mneme-cli 0.5.1__tar.gz → 0.5.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. {mneme_cli-0.5.1 → mneme_cli-0.5.3}/AGENTS.md +387 -1
  2. {mneme_cli-0.5.1 → mneme_cli-0.5.3}/CHANGELOG.md +65 -0
  3. {mneme_cli-0.5.1 → mneme_cli-0.5.3}/PKG-INFO +94 -17
  4. {mneme_cli-0.5.1 → mneme_cli-0.5.3}/README.md +92 -15
  5. {mneme_cli-0.5.1 → mneme_cli-0.5.3}/mneme/__init__.py +1 -1
  6. {mneme_cli-0.5.1 → mneme_cli-0.5.3}/mneme/core.py +67 -25
  7. {mneme_cli-0.5.1 → mneme_cli-0.5.3}/pyproject.toml +3 -1
  8. {mneme_cli-0.5.1 → mneme_cli-0.5.3}/tests/test_core.py +145 -2
  9. {mneme_cli-0.5.1 → mneme_cli-0.5.3}/CLAUDE.md +0 -0
  10. {mneme_cli-0.5.1 → mneme_cli-0.5.3}/CODER.md +0 -0
  11. {mneme_cli-0.5.1 → mneme_cli-0.5.3}/EXAMPLES.md +0 -0
  12. {mneme_cli-0.5.1 → mneme_cli-0.5.3}/FEATURES.md +0 -0
  13. {mneme_cli-0.5.1 → mneme_cli-0.5.3}/LICENSE +0 -0
  14. {mneme_cli-0.5.1 → mneme_cli-0.5.3}/MANIFEST.in +0 -0
  15. {mneme_cli-0.5.1 → mneme_cli-0.5.3}/mneme/__main__.py +0 -0
  16. {mneme_cli-0.5.1 → mneme_cli-0.5.3}/mneme/config.py +0 -0
  17. {mneme_cli-0.5.1 → mneme_cli-0.5.3}/mneme/profiles/eu-mdr.md +0 -0
  18. {mneme_cli-0.5.1 → mneme_cli-0.5.3}/mneme/profiles/iso-13485.md +0 -0
  19. {mneme_cli-0.5.1 → mneme_cli-0.5.3}/mneme/profiles/mappings/dds.json +0 -0
  20. {mneme_cli-0.5.1 → mneme_cli-0.5.3}/mneme/profiles/mappings/requirements.json +0 -0
  21. {mneme_cli-0.5.1 → mneme_cli-0.5.3}/mneme/profiles/mappings/risk-register.json +0 -0
  22. {mneme_cli-0.5.1 → mneme_cli-0.5.3}/mneme/profiles/mappings/test-cases.json +0 -0
  23. {mneme_cli-0.5.1 → mneme_cli-0.5.3}/mneme/profiles/mappings/user-needs.json +0 -0
  24. {mneme_cli-0.5.1 → mneme_cli-0.5.3}/mneme/search.py +0 -0
  25. {mneme_cli-0.5.1 → mneme_cli-0.5.3}/mneme/server.py +0 -0
  26. {mneme_cli-0.5.1 → mneme_cli-0.5.3}/mneme/templates/workspace/.gitignore +0 -0
  27. {mneme_cli-0.5.1 → mneme_cli-0.5.3}/mneme/templates/workspace/AGENTS.md +0 -0
  28. {mneme_cli-0.5.1 → mneme_cli-0.5.3}/mneme/templates/workspace/README.md +0 -0
  29. {mneme_cli-0.5.1 → mneme_cli-0.5.3}/mneme/templates/workspace/inbox/.gitkeep +0 -0
  30. {mneme_cli-0.5.1 → mneme_cli-0.5.3}/mneme/templates/workspace/index.md +0 -0
  31. {mneme_cli-0.5.1 → mneme_cli-0.5.3}/mneme/templates/workspace/log.md +0 -0
  32. {mneme_cli-0.5.1 → mneme_cli-0.5.3}/mneme/templates/workspace/profiles/README.md +0 -0
  33. {mneme_cli-0.5.1 → mneme_cli-0.5.3}/mneme/templates/workspace/profiles/mappings/.gitkeep +0 -0
  34. {mneme_cli-0.5.1 → mneme_cli-0.5.3}/mneme/templates/workspace/schema/entities.json +0 -0
  35. {mneme_cli-0.5.1 → mneme_cli-0.5.3}/mneme/templates/workspace/schema/graph.json +0 -0
  36. {mneme_cli-0.5.1 → mneme_cli-0.5.3}/mneme/templates/workspace/schema/tags.json +0 -0
  37. {mneme_cli-0.5.1 → mneme_cli-0.5.3}/mneme/templates/workspace/sources/.gitkeep +0 -0
  38. {mneme_cli-0.5.1 → mneme_cli-0.5.3}/mneme/templates/workspace/wiki/_templates/page.md +0 -0
  39. {mneme_cli-0.5.1 → mneme_cli-0.5.3}/mneme/ui.html +0 -0
  40. {mneme_cli-0.5.1 → mneme_cli-0.5.3}/mneme_cli.egg-info/SOURCES.txt +0 -0
  41. {mneme_cli-0.5.1 → mneme_cli-0.5.3}/setup.cfg +0 -0
  42. {mneme_cli-0.5.1 → mneme_cli-0.5.3}/tests/__init__.py +0 -0
  43. {mneme_cli-0.5.1 → mneme_cli-0.5.3}/tests/test_agent_loop.py +0 -0
  44. {mneme_cli-0.5.1 → mneme_cli-0.5.3}/tests/test_bug_regressions.py +0 -0
  45. {mneme_cli-0.5.1 → mneme_cli-0.5.3}/tests/test_ingest_csv.py +0 -0
  46. {mneme_cli-0.5.1 → mneme_cli-0.5.3}/tests/test_profile.py +0 -0
  47. {mneme_cli-0.5.1 → mneme_cli-0.5.3}/tests/test_schema_search.py +0 -0
  48. {mneme_cli-0.5.1 → mneme_cli-0.5.3}/tests/test_search.py +0 -0
  49. {mneme_cli-0.5.1 → mneme_cli-0.5.3}/tests/test_tornado_lint.py +0 -0
  50. {mneme_cli-0.5.1 → mneme_cli-0.5.3}/tests/test_trace.py +0 -0
@@ -280,6 +280,93 @@ plain-markdown `<details>` fallback so the page is useful outside
280
280
  Obsidian. Run after a large ingest, or whenever the wiki's shape
281
281
  changes meaningfully.
282
282
 
283
+ ### 3.9 TRACE — linking the full V-model chain
284
+
285
+ The trace chain a notified body expects has two legs that both terminate
286
+ at code and tests:
287
+
288
+ ```
289
+ UN ──implemented-by──┐
290
+ ├──> REQ ──detailed-in──> DDS ──implemented-in──> codebase
291
+ RMA ──mitigated-by────┘ └──verified-by───> tests
292
+ ```
293
+
294
+ The first three links (UN→REQ, RMA→REQ, REQ→DDS) are created
295
+ automatically by the CSV mappings in `profiles/mappings/` (or by
296
+ `mneme trace add` when ingesting structured sources). The last two
297
+ links (DDS→codebase, DDS→tests) close the V-model and are the agent's
298
+ responsibility when a user passes you one or more repositories.
299
+
300
+ **When the user passes you a repo path, you must:**
301
+
302
+ ```bash
303
+ # 1. Inventory: what code modules / test files exist?
304
+ mneme scan-repo <repo-path> <client>
305
+ # → reports which wiki pages reference the repo's modules, and which do not.
306
+
307
+ # 2. For each DDS page that corresponds to a code module, add the link.
308
+ # The target is a git URL or an absolute repo path; mneme treats it
309
+ # as an opaque string (not a wiki slug) — the target may live outside
310
+ # the workspace.
311
+ mneme trace add <client>/dds-cyb-001 \
312
+ "github.com/<org>/<repo>/blob/main/src/auth/password_policy.py" \
313
+ implemented-in
314
+
315
+ # 3. For each DDS page that has a corresponding test, add the link.
316
+ # The test target can be a wiki page (for test-plan docs) or an
317
+ # external path (for a test file in a repo).
318
+ mneme trace add <client>/dds-cyb-001 <client>/test-auth-001 verified-by
319
+ mneme trace add <client>/dds-cyb-001 \
320
+ "github.com/<org>/<repo>/blob/main/tests/test_password_policy.py" \
321
+ verified-by
322
+ ```
323
+
324
+ Do this for every DDS page that has implementing code or a verifying
325
+ test. When there are tens or hundreds of links to create (typical for
326
+ a real medical-device codebase):
327
+
328
+ ```bash
329
+ # Batch approach — the agent parses the repo, maps DDS → files,
330
+ # then writes a shell script of `mneme trace add` lines and runs it.
331
+ # mneme has no bulk-trace-add subcommand yet; scripting is the way.
332
+ for pair in dds-cyb-001:src/auth/password_policy.py \
333
+ dds-cyb-002:src/auth/mfa.py \
334
+ dds-cyb-003:src/auth/rate_limiter.py; do
335
+ dds=${pair%%:*}; file=${pair##*:}
336
+ mneme trace add <client>/$dds "<repo-url>/$file" implemented-in
337
+ done
338
+ ```
339
+
340
+ **Verify the chain is now complete:**
341
+
342
+ ```bash
343
+ mneme trace gaps <client>
344
+ # → should report 0 hazards without mitigation, 0 DDS without
345
+ # implementation link, 0 DDS without verification link
346
+
347
+ mneme trace show <client>/un-001
348
+ # → UN.001
349
+ # implemented-by -> REQ.SYS.001
350
+ # detailed-in -> DDS.CYB.001
351
+ # implemented-in -> github.com/.../password_policy.py
352
+ # verified-by -> github.com/.../test_password_policy.py
353
+ ```
354
+
355
+ **Relationship vocabulary — use exactly these strings:**
356
+
357
+ | Relationship | From → To | Semantics |
358
+ |---|---|---|
359
+ | `implemented-by` | UN → REQ | The user need is met by this requirement |
360
+ | `mitigated-by` | RMA → REQ | The hazard is mitigated by this requirement |
361
+ | `derived-from` | REQ → UN / REQ → higher-level REQ | Parent requirement |
362
+ | `detailed-in` | REQ → DDS | The requirement is elaborated by this design spec |
363
+ | `implemented-in` | DDS → codebase | The design spec is realised by this source file / module |
364
+ | `verified-by` | DDS → test / REQ → test | The spec/requirement is verified by this test |
365
+ | `validated-by` | DDS → clinical/usability study | Validation (not verification) evidence |
366
+
367
+ Stick to this vocabulary. Custom relationships confuse downstream
368
+ matrix exports and break the default `trace gaps` heuristics.
369
+
283
370
  ---
284
371
 
285
372
  ## 4. Profiles and the writing-style contract
@@ -572,7 +659,306 @@ file.
572
659
  Stop conditions: inbox is empty, `mneme stats` shows a plausible page
573
660
  count, and `mneme lint` reports no critical issues.
574
661
 
575
- ### 6.6 Pre-submission readiness check before sending to a notified body
662
+ ### 6.6 Close the V-model by linking DDS to codebase and tests
663
+
664
+ The user has just handed you one or more repositories. Your job is to
665
+ connect every DDS page to the implementing source file(s) and the
666
+ verifying test file(s) so `mneme trace show` walks end-to-end from a
667
+ user need / hazard all the way to the exact line of code and the exact
668
+ test that exercises it.
669
+
670
+ ```
671
+ 1. mneme profile show # sanity check
672
+ 2. mneme trace matrix <client> # baseline — which DDS exist?
673
+ 3. For each repo the user passes:
674
+ a. mneme scan-repo <repo-path> <client> # surface module gaps
675
+ b. Read the repo tree and README yourself.
676
+ Build a mapping: DDS ID -> [source files]
677
+ DDS ID -> [test files]
678
+ Prefer explicit evidence (comments referencing the DDS ID,
679
+ module/function names that mirror the DDS title, docstrings
680
+ that cite the requirement). When evidence is weak, flag the
681
+ DDS as ambiguous and surface it — do not guess.
682
+ 4. For each confident (DDS, file) pair:
683
+ mneme trace add <client>/<dds-slug> "<repo-url-or-path>/<file>" implemented-in
684
+ mneme trace add <client>/<dds-slug> "<repo-url-or-path>/<test-file>" verified-by
685
+ Batch these in a shell loop — there is no bulk-trace-add subcommand.
686
+ 5. mneme trace gaps <client> # should trend to zero
687
+ 6. mneme trace show <client>/un-001 # spot-check: full chain
688
+ from UN to test file?
689
+ 7. mneme trace matrix <client> --csv --out trace-matrix.csv
690
+ # DHF-ready export
691
+ ```
692
+
693
+ Stop conditions: (a) every DDS page either has both `implemented-in`
694
+ and `verified-by` trace links OR is explicitly flagged ambiguous in a
695
+ report to the user, AND (b) `trace gaps` reports zero open chains.
696
+
697
+ Hard rules:
698
+ - Do not fabricate file paths. If the repo has no file matching a DDS,
699
+ report the gap and stop — the user must either point you at another
700
+ repo or add the link manually.
701
+ - Trace targets for external files are opaque strings. Use a stable
702
+ form the team can resolve later (a git URL with a pinned commit is
703
+ ideal; a bare relative path is fine when the repo lives alongside
704
+ the workspace).
705
+ - Never rewrite a DDS page's body to embed the code link. The link
706
+ lives in `schema/traceability.json` only. Wiki pages stay prose.
707
+
708
+ ### 6.7 Ingest a code repo into the wiki as searchable module summaries
709
+
710
+ The user has handed you a code repo. Your job is to produce one wiki page
711
+ per logical module so future agents can answer "how does this codebase do
712
+ X?" through `mneme search` instead of re-reading the source.
713
+
714
+ This is the foundation for any later code-aware work (style-matched
715
+ extension, refactor planning, gap analysis). It does not modify the repo —
716
+ read-only ingestion.
717
+
718
+ ```
719
+ 1. Walk <REPO_PATH>. Skip: .git, node_modules, .venv, dist, build,
720
+ __pycache__, anything in .gitignore.
721
+
722
+ 2. Group files into logical modules. Heuristics:
723
+ - A directory containing __init__.py / mod.rs / index.ts / mod.go
724
+ is one module.
725
+ - A standalone script with no siblings is one module.
726
+ - Tests (tests/ or *_test.* alongside) are part of the module they
727
+ test, not separate modules.
728
+
729
+ 3. For each module, write a summary file at
730
+ /tmp/mneme-summaries/<module-path>.md
731
+ with this exact frontmatter and section structure:
732
+
733
+ ---
734
+ title: <Module Name>
735
+ type: code-summary
736
+ client: <CLIENT_SLUG>
737
+ sources:
738
+ - <repo-relative path of every file in the module>
739
+ tags:
740
+ - code
741
+ - <language>
742
+ - <one-or-two-domain-tags>
743
+ ---
744
+
745
+ ## Purpose
746
+ One paragraph in plain English. No code.
747
+
748
+ ## Public API
749
+ List of exported functions / classes / types, one line each.
750
+ Format: `name(args) -> return_type` then a sentence.
751
+
752
+ ## Key data structures
753
+ Non-trivial types or schemas this module owns. Skip if none.
754
+
755
+ ## Dependencies
756
+ - Internal: which other modules in this repo it imports
757
+ - External: which libraries (with pinned version if any)
758
+
759
+ ## Tests
760
+ Path to test file(s) + one sentence on coverage shape.
761
+
762
+ ## Conventions observed
763
+ 3-5 bullets: error style, async/sync, naming, comment density, etc.
764
+
765
+ 4. For files too large to read in one pass:
766
+ a. Read the first 200 lines.
767
+ b. Read the last 100 lines.
768
+ c. If the file has a clear table-of-contents (a __all__, an exports
769
+ block, a class index near the top), use it to guide which middle
770
+ sections to read in additional 200-line chunks.
771
+ d. State in the summary's Purpose section that this was a partial
772
+ read, and tag the page `partial-read` so a future pass can
773
+ revisit.
774
+
775
+ 5. Ingest the summaries in one pass:
776
+ mneme ingest-dir /tmp/mneme-summaries <CLIENT_SLUG> --recursive --flat
777
+
778
+ Use --flat: the summaries already encode their path in the slug, and
779
+ they don't live under sources/<CLIENT_SLUG>/ so subpath auto-detection
780
+ won't help.
781
+
782
+ 6. Smoke-test:
783
+ mneme stats
784
+ mneme search "<a real concept from the repo>" --client <CLIENT_SLUG>
785
+ mneme tags list
786
+ ```
787
+
788
+ Stop conditions: every module in the repo (modulo the skip list) has a
789
+ wiki summary, and a search for a known concept returns the right module.
790
+
791
+ Hard rules:
792
+ - Do not generate summaries for files you did not actually read. Partial
793
+ reads must be tagged `partial-read` in the page's frontmatter.
794
+ - Do not speculate. If a module's purpose is unclear from the code, write
795
+ "unclear, needs human review" and tag the page `needs-review`.
796
+ - Do not modify the repo. Read-only.
797
+ - Keep summaries under 300 lines. They are pointers, not replacements.
798
+ - One module = one wiki page. Do not split a module across pages, and
799
+ do not merge unrelated modules into one page.
800
+
801
+ Report when done: total modules summarized, count tagged `partial-read`,
802
+ count tagged `needs-review`, directories skipped and why, and the three
803
+ search queries you used to verify the ingest.
804
+
805
+ ### 6.8 Augment a wiki page with knowledge from ingested code summaries
806
+
807
+ Pre-condition: 6.7 has run, so the repo is in the wiki as `code-summary`
808
+ pages. You now have a target wiki page (sparse, half-finished, or
809
+ explicitly marked TBD) and you want to enrich it with sections that
810
+ draw on the code knowledge — in the page's existing voice, with every
811
+ claim cited.
812
+
813
+ This is selective augment, not regeneration. Existing prose is sacred.
814
+
815
+ ```
816
+ 1. Read the target page in full at <WORKSPACE>/wiki/<client>/<page>.md.
817
+ Note: existing tone, sentence length, citation density, heading depth,
818
+ table-of-contents shape. These define the local style you must match.
819
+
820
+ 2. Decide what to add. Two paths:
821
+ a. Human-driven: the user told you "add a Performance Characteristics
822
+ section drawing latency data from the codebase." Skip to step 3.
823
+ b. Agent-driven: gap analysis. Compare the target's actual sections
824
+ against (i) the active profile's expected sections for this
825
+ doc-type (run `mneme profile show`), and (ii) topics covered by
826
+ code-summary pages that the target does not cite. Propose 1-5
827
+ candidate sections to the human and wait for confirmation. Do not
828
+ add sections without confirmation.
829
+
830
+ 3. For each agreed section, gather evidence:
831
+ mneme search "<topic keywords>" --client <client> -k 20
832
+ Prefer hits with the `code` tag for implementation details. Prefer
833
+ regulatory wiki pages for context and definitions. Read the top hits
834
+ in full before writing.
835
+
836
+ 4. Draft the section. Hard requirements:
837
+ - Match the target's local style. Local consistency wins over the
838
+ active profile's global rules within a single page.
839
+ - Every non-trivial claim cites its source as
840
+ `(wiki: <client>/<page>)` or `(source: <repo-relative-path>)`.
841
+ - When evidence is insufficient for a claim, do not invent it.
842
+ Insert `[TO ADD REF]` and continue.
843
+
844
+ 5. Insert at the structurally correct location. Read the target's TOC.
845
+ The new section's heading depth and ordering must follow the
846
+ document's own logic, not your intuition.
847
+
848
+ 6. Update the target's frontmatter:
849
+ - Append every newly cited source to the `sources:` list.
850
+ - Bump `updated:` to today.
851
+ - If the page was previously marked draft / TBD and is now complete,
852
+ update `confidence:` accordingly.
853
+
854
+ 7. Re-ingest the target so search picks up the new content. Two options:
855
+ a. If the page has a corresponding source file in sources/<client>/,
856
+ mirror your wiki edits back to it and run:
857
+ mneme resync sources/<client>/<path-to-source> <client>
858
+ b. Otherwise, edit the wiki page directly and run:
859
+ mneme reindex
860
+ ```
861
+
862
+ Stop conditions: every agreed section is either (a) written with full
863
+ citations, or (b) explicitly flagged as evidence-insufficient and
864
+ reported back to the human. The page passes
865
+ `mneme validate writing-style <client>/<page>` against the active
866
+ profile.
867
+
868
+ Hard rules:
869
+ - Do NOT rewrite existing prose. Augment only — add new sections, do not
870
+ edit current ones unless explicitly asked.
871
+ - Do NOT fabricate citations. Every `(wiki: ...)` and `(source: ...)`
872
+ reference must resolve to an actual page or file.
873
+ - Do NOT exceed the human-confirmed scope. If gap analysis surfaced 5
874
+ candidate sections and the human approved 2, write only those 2.
875
+ - Do NOT touch the page's frontmatter `created:` or `client:` fields.
876
+
877
+ Report when done: sections added (with line counts), sources cited
878
+ (deduplicated list), any sections you were asked to write but skipped
879
+ because evidence was insufficient (with a one-line explanation per skip),
880
+ and the result of the post-edit `mneme validate writing-style` run.
881
+
882
+ ### 6.9 Validate a claim against the literature wiki
883
+
884
+ You are about to write or have already written a factual claim in a
885
+ deliverable (DVR, CER, technical documentation, etc.). Before the
886
+ claim ships to a notified body, it must be backed by an authoritative
887
+ source — or explicitly carry `[TO ADD REF]` so the gap is visible.
888
+
889
+ Pre-condition: the relevant literature has been ingested into the wiki
890
+ (typically under `research-questions/` or similar) and tagged with
891
+ `literature` plus an authority marker (`authority` / `non-authority`).
892
+ If those tags don't exist, run a one-time `mneme tags bulk-suggest` /
893
+ `bulk-apply` pass to add them — see Step 3 in the README.
894
+
895
+ ```
896
+ 1. Identify the claim. Reduce it to its load-bearing assertion.
897
+ "Parkinsonian tremor manifests primarily in the 4-6 Hz band" is a
898
+ claim. "Tremor is a problem" is not — too vague to validate.
899
+
900
+ 2. Search the literature for evidence. Be specific in the query:
901
+ mneme search "<claim keywords>" --client <client> -k 30
902
+ When `mneme search --tag` is available (planned), prefer:
903
+ mneme search "<claim keywords>" --client <client> --tag authority -k 20
904
+
905
+ 3. Read the top hits in full. Sort the relevant ones into three buckets:
906
+ a. AUTHORITY supports the claim (peer-reviewed, recent, on-topic)
907
+ b. NON-AUTHORITY supports the claim (preprints, blog posts, secondary)
908
+ c. Nothing relevant, or hits contradict the claim
909
+
910
+ 4. Decide based on the bucket:
911
+
912
+ a. AUTHORITY support
913
+ -> Write the claim with the citation:
914
+ "...4-6 Hz band (wiki: <client>/research-questions/.../<page>)."
915
+ -> Append the cited page to the deliverable's frontmatter
916
+ `sources:` list if not already present.
917
+
918
+ b. NON-AUTHORITY support only
919
+ -> Either soften the claim ("Preliminary reports suggest..."),
920
+ OR keep the strong form with [TO ADD REF] and find an
921
+ authority source separately.
922
+ -> Do NOT cite a non-authority source as if it were authoritative.
923
+
924
+ c. No support / contradicting evidence
925
+ -> Three options, in order of preference:
926
+ i. Drop the claim. The deliverable doesn't need it.
927
+ ii. Find a new authority source. Drop the PDF into
928
+ sources/<client>/<literature-path>/, summarize and ingest
929
+ it (run a single-page version of 6.7), then return to step 2.
930
+ iii. Keep the claim but mark it [TO ADD REF] AND open a
931
+ tracked TODO so the gap doesn't ship by accident.
932
+
933
+ 5. After resolving the claim (or marking it), run:
934
+ mneme validate writing-style <client>/<deliverable-page>
935
+ The review packet flags every remaining [TO ADD REF] and every
936
+ uncited factual claim. Address them or hand the page back to the
937
+ human reviewer with the gaps surfaced.
938
+ ```
939
+
940
+ Stop conditions: the claim is either (a) cited with an authority
941
+ source, (b) softened to match the strength of the available evidence,
942
+ (c) dropped, or (d) explicitly marked `[TO ADD REF]` AND tracked for
943
+ follow-up. Never (e) cited with fabricated or non-authoritative
944
+ evidence dressed as authoritative.
945
+
946
+ Hard rules:
947
+ - Do NOT cite a wiki page you did not read. Read every page you cite.
948
+ - Do NOT cite a non-authority source as `(wiki: ...)` without making
949
+ its non-authority status visible in the surrounding prose.
950
+ - Do NOT silently weaken or rewrite the claim to dodge the citation
951
+ requirement. If the evidence is weak, say so.
952
+ - Do NOT bulk-clear `[TO ADD REF]` markers without going through this
953
+ procedure for each one. Each marker is a discrete claim that needs
954
+ individual evidence.
955
+
956
+ Report when done: the original claim, the final form of the claim
957
+ (verbatim if changed), the citation added (or the [TO ADD REF] marker
958
+ left in place), the wiki pages read, and a one-line note on whether
959
+ this gap should be tracked for human follow-up.
960
+
961
+ ### 6.10 Pre-submission readiness check before sending to a notified body
576
962
 
577
963
  ```
578
964
  1. mneme profile show # confirm active profile
@@ -4,6 +4,71 @@ All notable changes to this project are documented here.
4
4
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
5
5
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
6
6
 
7
+ ## [0.5.2] - 2026-04-14
8
+
9
+ ### Changed
10
+
11
+ - **`ingest-dir --preserve-structure` is now the default.** The wiki now
12
+ mirrors the source directory layout unless you pass `--flat`. This avoids
13
+ silent same-basename collisions (e.g. multiple `INSTRUCTIONS.md` files from
14
+ different source directories overwriting each other). Closes suggestion #15.
15
+ - **`mneme ingest` (single-file) also mirrors by default.** When the source
16
+ lives under `sources/<client>/`, its relative position becomes a wiki
17
+ subpath automatically. Pass `--flat` to opt out.
18
+
19
+ ### Fixed
20
+
21
+ - **`mneme profile list`** now discovers profiles correctly. Previously it
22
+ filtered files by `.json` (wrong extension — profiles are markdown) and
23
+ only checked the bundled directory, which meant the shipped `eu-mdr.md`
24
+ and `iso-13485.md` profiles appeared as "No profiles found". Now unions
25
+ workspace + bundled, marks origin, and flags shadowed bundled profiles.
26
+ Closes suggestion #25 discovery bug.
27
+
28
+ ### Added
29
+
30
+ - **`ingest-dir --flat`** — explicit opt-out for the new preserve-structure
31
+ default.
32
+ - **`ingest --flat`** — opt-out for the single-file command.
33
+ - **xlsx support is now built-in.** `openpyxl` moved from
34
+ `[project.optional-dependencies].xlsx` to `dependencies`. The `[xlsx]`
35
+ extra is kept for backwards compatibility but is no longer required.
36
+
37
+ ### Documentation
38
+
39
+ - **README**: expanded the agent end-to-end example. Step 3 now covers
40
+ bulk tagging (`tags bulk-suggest` + `bulk-apply`), Step 3b adds entity
41
+ typing (`entity suggest` + `bulk-apply`), and Step 3c walks the full
42
+ V-model trace chain (UN→REQ→DDS and RMA→REQ→DDS, terminating at code
43
+ and tests).
44
+ - **AGENTS.md**: new section 3.9 "TRACE — linking the full V-model
45
+ chain" documents the `implemented-in` / `verified-by` relationships
46
+ and the DDS-to-codebase linking agents must perform when the user
47
+ passes repositories. New task template 6.6 "Close the V-model by
48
+ linking DDS to codebase and tests" gives the exact procedure, stop
49
+ conditions, and hard rules (no fabricated paths, trace targets are
50
+ opaque strings, never embed code links in page bodies).
51
+ ## [0.5.3] - 2026-04-15
52
+
53
+ ### Documentation
54
+
55
+ - **AGENTS.md**: new task template 6.7 "Ingest a code repo into the
56
+ wiki as searchable module summaries" — the foundation for any
57
+ code-aware agent work. One wiki page per logical module, chunked
58
+ reading for large files, explicit tagging for partial/unclear pages,
59
+ and `mneme ingest-dir --flat` for the bulk write.
60
+ - **AGENTS.md**: new task template 6.8 "Augment a wiki page with
61
+ knowledge from ingested code summaries" — selective enrichment of a
62
+ target page using evidence drawn from the code summaries produced by
63
+ 6.7. Existing prose is sacred; agent only adds new sections, in the
64
+ page's local style, with every claim cited.
65
+ - **AGENTS.md**: new task template 6.9 "Validate a claim against the
66
+ literature wiki" — the discipline an agent applies before any
67
+ factual claim ships to a notified body. Three buckets (authority /
68
+ non-authority / no support), four resolutions (cite / soften / drop /
69
+ mark `[TO ADD REF]`), zero tolerance for non-authority dressed as
70
+ authoritative.
71
+
7
72
  ## [0.5.0] - 2026-04-13
8
73
 
9
74
  ### Breaking Changes
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mneme-cli
3
- Version: 0.5.1
3
+ Version: 0.5.3
4
4
  Summary: mneme - CLI tool that turns documents into a searchable second brain. Ingest once, query forever.
5
5
  Author-email: Tolis Moustaklis <apostolos.moustaklis@gmail.com>
6
6
  License-Expression: MIT
@@ -29,13 +29,13 @@ Requires-Python: >=3.9
29
29
  Description-Content-Type: text/markdown
30
30
  License-File: LICENSE
31
31
  Requires-Dist: portalocker>=2.0.0
32
+ Requires-Dist: openpyxl>=3.1.0
32
33
  Provides-Extra: pdf
33
34
  Requires-Dist: pymupdf>=1.23.0; extra == "pdf"
34
35
  Provides-Extra: xlsx
35
36
  Requires-Dist: openpyxl>=3.1.0; extra == "xlsx"
36
37
  Provides-Extra: all
37
38
  Requires-Dist: pymupdf>=1.23.0; extra == "all"
38
- Requires-Dist: openpyxl>=3.1.0; extra == "all"
39
39
  Provides-Extra: release
40
40
  Requires-Dist: build>=1.0.0; extra == "release"
41
41
  Requires-Dist: twine>=5.0.0; extra == "release"
@@ -183,7 +183,7 @@ One installed CLI serves many projects — each workspace is just a directory.
183
183
  | `mneme stats` | Health overview |
184
184
  | `mneme repair` | Fix corrupted archives |
185
185
 
186
- **Formats:** `.md`, `.txt`, `.pdf`, `.xlsx` (with `pip install "mneme-cli[xlsx]"`)
186
+ **Formats:** `.md`, `.txt`, `.pdf`, `.xlsx` (built-in), plus `.csv` via `mneme ingest-csv`
187
187
 
188
188
  ---
189
189
 
@@ -238,38 +238,115 @@ Creates the workspace tree, sets the EU MDR writing-style profile, and initializ
238
238
  cp -r ~/Downloads/parkinson-research/* inbox/
239
239
  mneme tornado --client parkiwatch
240
240
 
241
- # Or ingest individual files
241
+ # Or ingest individual files (auto-mirrors sources/<client>/ layout into wiki/)
242
242
  mneme ingest research-paper.pdf parkiwatch
243
- mneme ingest-csv risk-register.csv parkiwatch --mapping risk-register
244
243
  mneme ingest spec-table.xlsx parkiwatch # .xlsx renders sheets as markdown tables
245
- mneme ingest-dir docs/ parkiwatch --recursive # walk subdirectories
244
+ mneme ingest-dir docs/ parkiwatch --recursive # walk subdirectories, preserve structure
245
+
246
+ # Structured CSV ingestion — one row becomes one wiki page + trace links.
247
+ # Mappings live in <workspace>/profiles/mappings/ or are auto-detected.
248
+ mneme ingest-csv user-needs.csv parkiwatch --mapping parkiwatch-user-needs
249
+ mneme ingest-csv requirements.csv parkiwatch --mapping parkiwatch-req
250
+ mneme ingest-csv design-specs.csv parkiwatch --mapping parkiwatch-dds
251
+ mneme ingest-csv risk-register.csv parkiwatch --mapping parkiwatch-rma
246
252
  ```
247
253
 
248
- What happens per ingest: source file → wiki page in `wiki/parkiwatch/` → frontmatter with auto-extracted entities → entry in `index.md` → row in the FTS5 search DB → log entry.
254
+ What happens per ingest: source file → wiki page in `wiki/parkiwatch/<mirrored-subpath>/` → frontmatter with auto-extracted proper-noun entities → entry in `index.md` → row in the FTS5 search DB → log entry. CSV ingests additionally create trace links (e.g. UN→REQ `implemented-by`, REQ→DDS `detailed-in`) in `schema/traceability.json`.
249
255
 
250
- ### Step 3 — Tag the new pages (LLM agent)
256
+ ### Step 3 — Tag many pages at once (LLM agent, bulk)
251
257
 
252
- The new pages have only the auto-applied `parkiwatch` client tag. The agent now adds meaningful tags:
258
+ New pages have only the auto-applied `parkiwatch` client tag. The agent tags them in batches:
253
259
 
254
260
  ```bash
255
- # For each new page, the agent runs:
256
- mneme tags suggest parkiwatch/research-paper > /tmp/packet.md
261
+ # 1. Pack up to 30 untagged pages into a single review packet.
262
+ # --filter scopes by wiki_path substring; omit for everything.
263
+ mneme tags bulk-suggest --filter indicators --limit 30 \
264
+ --json --out /tmp/tag-packet.json
257
265
  ```
258
266
 
259
- The packet contains the page body, the current tag taxonomy (every tag in the workspace + usage counts), and a ready-to-paste prompt. **The LLM reads the packet** — it understands the content and decides on tags, preferring existing taxonomy entries when they fit. The LLM's response is JSON:
267
+ The packet contains, for each page: wiki_path, title, current tags, body excerpt, and the existing taxonomy with usage counts. **The LLM reads the packet** and returns a response JSON:
260
268
 
261
269
  ```json
262
- {"tags": ["clinical-trial", "iso-13485"], "new_tags": ["bradykinesia-detection"]}
270
+ {
271
+ "pages": [
272
+ {"wiki_path": "parkiwatch/indicators/bda_algorithm_description.md",
273
+ "add": ["bradykinesia", "algorithm", "imu", "medical-device"]},
274
+ {"wiki_path": "parkiwatch/indicators/tremor_indicator_dataflow.md",
275
+ "add": ["tremor", "dataflow", "imu", "algorithm"]}
276
+ ]
277
+ }
263
278
  ```
264
279
 
265
- The agent then runs:
280
+ ```bash
281
+ # 2. Apply all decisions in one atomic call
282
+ mneme tags bulk-apply /tmp/tag-response.json
283
+ # → Pages updated: 9 Tags added: 42 Tags removed: 0
284
+ ```
285
+
286
+ Each application rewrites the wiki page frontmatter, updates `schema/tags.json`, re-indexes the page in FTS5, and appends a log entry. Subsequent packets reuse the growing taxonomy, so the vocabulary converges.
287
+
288
+ For single pages use `mneme tags suggest <slug>` + `mneme tags apply <slug> --add a,b,c`.
289
+
290
+ ### Step 3b — Classify entities by type (LLM agent)
291
+
292
+ Ingest auto-extracts capitalized proper nouns (e.g. "Parkiwatch", "IEC 62304") into `schema/entities.json` with `type: unknown`. Typing is an LLM judgement call, handled the same packet way as tags:
266
293
 
267
294
  ```bash
268
- mneme tags apply parkiwatch/research-paper \
269
- --add clinical-trial,iso-13485,bradykinesia-detection
295
+ # 1. Build an entity-classification packet (up to 50 unclassified entities)
296
+ mneme entity suggest --client parkiwatch --limit 50 \
297
+ --json --out /tmp/entity-packet.json
298
+
299
+ # 2. LLM reads the packet and returns classifications:
300
+ # [{"id": "iec-62304", "type": "standard"},
301
+ # {"id": "notified-body", "type": "organization"},
302
+ # {"id": "bradykinesia", "type": "concept"}, ...]
303
+
304
+ # 3. Apply atomically
305
+ mneme entity bulk-apply /tmp/entity-response.json
306
+ # → Entities typed: 47 Errors: 0
307
+ ```
308
+
309
+ Supported types include `standard`, `organization`, `person`, `concept`, `technology`, `regulation`, or any custom type the profile defines. Typed entities power filtered search and the knowledge graph.
310
+
311
+ ### Step 3c — Verify the trace chain (human, on demand)
312
+
313
+ The CSV ingests in Step 2 created two parallel trace chains. Both converge at a requirement, drill into design specs, and finally terminate at **code** and **tests** — the complete QMS traceability an auditor expects:
314
+
315
+ ```
316
+ Chain A: UN ─┐
317
+ ├─> REQ ──> DDS ──┬─> codebase (via `implemented-in`)
318
+ Chain B: RMA ┘ └─> tests (via `verified-by`)
270
319
  ```
271
320
 
272
- Atomic operation: rewrites the wiki page frontmatter, updates `schema/tags.json`, re-indexes the page in FTS5 (so search picks up the new tags immediately), appends a log entry. **Repeat for every page** the taxonomy grows, and subsequent pages tend to reuse existing tags (consistency).
321
+ Each arrow is a trace-link relationship type (`implemented-by`, `mitigated-by`, `detailed-in`, `implemented-in`, `verified-by`). The DDS→codebase link is stored as a frontmatter field on each DDS page (e.g. a git URL pointing at the implementing module). The DDS→tests link is a standard trace relationship added either by CSV ingest or by `mneme trace add`.
322
+
323
+ Walk either chain from any root page:
324
+
325
+ ```bash
326
+ # Chain A — from a user need forward to the specs that implement it
327
+ mneme trace show parkiwatch/un-001
328
+ # → UN.001 (secure sign-in)
329
+ # implemented-by -> REQ.SYS.001 (User Authentication)
330
+ # detailed-in -> DDS.CYB.001 (Strong Password Policy)
331
+ # detailed-in -> DDS.CYB.002 (Multi-Factor Authentication)
332
+ # ...
333
+
334
+ # Chain B — from a hazard forward to the specs that mitigate it
335
+ mneme trace show parkiwatch/rma-cyb-002
336
+ # → RMA.CYB.002 (Unauthorized access -- weak passwords)
337
+ # mitigated-by -> REQ.SYS.001 (User Authentication)
338
+ # detailed-in -> DDS.CYB.001, DDS.CYB.002, ...
339
+ # implemented-in -> src/auth/password_policy.py (codebase)
340
+ # verified-by -> TEST.AUTH.001 (tests)
341
+
342
+ # Trace gaps for a notified body audit
343
+ mneme trace gaps parkiwatch
344
+ # → Hazards with no mitigation: ...
345
+ # User needs with no requirements: ...
346
+
347
+ # Export the full traceability matrix for the DHF
348
+ mneme trace matrix parkiwatch --csv --out trace-matrix.csv
349
+ ```
273
350
 
274
351
  ### Step 4 — Search the knowledge base (anyone)
275
352
 
@@ -140,7 +140,7 @@ One installed CLI serves many projects — each workspace is just a directory.
140
140
  | `mneme stats` | Health overview |
141
141
  | `mneme repair` | Fix corrupted archives |
142
142
 
143
- **Formats:** `.md`, `.txt`, `.pdf`, `.xlsx` (with `pip install "mneme-cli[xlsx]"`)
143
+ **Formats:** `.md`, `.txt`, `.pdf`, `.xlsx` (built-in), plus `.csv` via `mneme ingest-csv`
144
144
 
145
145
  ---
146
146
 
@@ -195,38 +195,115 @@ Creates the workspace tree, sets the EU MDR writing-style profile, and initializ
195
195
  cp -r ~/Downloads/parkinson-research/* inbox/
196
196
  mneme tornado --client parkiwatch
197
197
 
198
- # Or ingest individual files
198
+ # Or ingest individual files (auto-mirrors sources/<client>/ layout into wiki/)
199
199
  mneme ingest research-paper.pdf parkiwatch
200
- mneme ingest-csv risk-register.csv parkiwatch --mapping risk-register
201
200
  mneme ingest spec-table.xlsx parkiwatch # .xlsx renders sheets as markdown tables
202
- mneme ingest-dir docs/ parkiwatch --recursive # walk subdirectories
201
+ mneme ingest-dir docs/ parkiwatch --recursive # walk subdirectories, preserve structure
202
+
203
+ # Structured CSV ingestion — one row becomes one wiki page + trace links.
204
+ # Mappings live in <workspace>/profiles/mappings/ or are auto-detected.
205
+ mneme ingest-csv user-needs.csv parkiwatch --mapping parkiwatch-user-needs
206
+ mneme ingest-csv requirements.csv parkiwatch --mapping parkiwatch-req
207
+ mneme ingest-csv design-specs.csv parkiwatch --mapping parkiwatch-dds
208
+ mneme ingest-csv risk-register.csv parkiwatch --mapping parkiwatch-rma
203
209
  ```
204
210
 
205
- What happens per ingest: source file → wiki page in `wiki/parkiwatch/` → frontmatter with auto-extracted entities → entry in `index.md` → row in the FTS5 search DB → log entry.
211
+ What happens per ingest: source file → wiki page in `wiki/parkiwatch/<mirrored-subpath>/` → frontmatter with auto-extracted proper-noun entities → entry in `index.md` → row in the FTS5 search DB → log entry. CSV ingests additionally create trace links (e.g. UN→REQ `implemented-by`, REQ→DDS `detailed-in`) in `schema/traceability.json`.
206
212
 
207
- ### Step 3 — Tag the new pages (LLM agent)
213
+ ### Step 3 — Tag many pages at once (LLM agent, bulk)
208
214
 
209
- The new pages have only the auto-applied `parkiwatch` client tag. The agent now adds meaningful tags:
215
+ New pages have only the auto-applied `parkiwatch` client tag. The agent tags them in batches:
210
216
 
211
217
  ```bash
212
- # For each new page, the agent runs:
213
- mneme tags suggest parkiwatch/research-paper > /tmp/packet.md
218
+ # 1. Pack up to 30 untagged pages into a single review packet.
219
+ # --filter scopes by wiki_path substring; omit for everything.
220
+ mneme tags bulk-suggest --filter indicators --limit 30 \
221
+ --json --out /tmp/tag-packet.json
214
222
  ```
215
223
 
216
- The packet contains the page body, the current tag taxonomy (every tag in the workspace + usage counts), and a ready-to-paste prompt. **The LLM reads the packet** — it understands the content and decides on tags, preferring existing taxonomy entries when they fit. The LLM's response is JSON:
224
+ The packet contains, for each page: wiki_path, title, current tags, body excerpt, and the existing taxonomy with usage counts. **The LLM reads the packet** and returns a response JSON:
217
225
 
218
226
  ```json
219
- {"tags": ["clinical-trial", "iso-13485"], "new_tags": ["bradykinesia-detection"]}
227
+ {
228
+ "pages": [
229
+ {"wiki_path": "parkiwatch/indicators/bda_algorithm_description.md",
230
+ "add": ["bradykinesia", "algorithm", "imu", "medical-device"]},
231
+ {"wiki_path": "parkiwatch/indicators/tremor_indicator_dataflow.md",
232
+ "add": ["tremor", "dataflow", "imu", "algorithm"]}
233
+ ]
234
+ }
220
235
  ```
221
236
 
222
- The agent then runs:
237
+ ```bash
238
+ # 2. Apply all decisions in one atomic call
239
+ mneme tags bulk-apply /tmp/tag-response.json
240
+ # → Pages updated: 9 Tags added: 42 Tags removed: 0
241
+ ```
242
+
243
+ Each application rewrites the wiki page frontmatter, updates `schema/tags.json`, re-indexes the page in FTS5, and appends a log entry. Subsequent packets reuse the growing taxonomy, so the vocabulary converges.
244
+
245
+ For single pages use `mneme tags suggest <slug>` + `mneme tags apply <slug> --add a,b,c`.
246
+
247
+ ### Step 3b — Classify entities by type (LLM agent)
248
+
249
+ Ingest auto-extracts capitalized proper nouns (e.g. "Parkiwatch", "IEC 62304") into `schema/entities.json` with `type: unknown`. Typing is an LLM judgement call, handled the same packet way as tags:
223
250
 
224
251
  ```bash
225
- mneme tags apply parkiwatch/research-paper \
226
- --add clinical-trial,iso-13485,bradykinesia-detection
252
+ # 1. Build an entity-classification packet (up to 50 unclassified entities)
253
+ mneme entity suggest --client parkiwatch --limit 50 \
254
+ --json --out /tmp/entity-packet.json
255
+
256
+ # 2. LLM reads the packet and returns classifications:
257
+ # [{"id": "iec-62304", "type": "standard"},
258
+ # {"id": "notified-body", "type": "organization"},
259
+ # {"id": "bradykinesia", "type": "concept"}, ...]
260
+
261
+ # 3. Apply atomically
262
+ mneme entity bulk-apply /tmp/entity-response.json
263
+ # → Entities typed: 47 Errors: 0
264
+ ```
265
+
266
+ Supported types include `standard`, `organization`, `person`, `concept`, `technology`, `regulation`, or any custom type the profile defines. Typed entities power filtered search and the knowledge graph.
267
+
268
+ ### Step 3c — Verify the trace chain (human, on demand)
269
+
270
+ The CSV ingests in Step 2 created two parallel trace chains. Both converge at a requirement, drill into design specs, and finally terminate at **code** and **tests** — the complete QMS traceability an auditor expects:
271
+
272
+ ```
273
+ Chain A: UN ─┐
274
+ ├─> REQ ──> DDS ──┬─> codebase (via `implemented-in`)
275
+ Chain B: RMA ┘ └─> tests (via `verified-by`)
227
276
  ```
228
277
 
229
- Atomic operation: rewrites the wiki page frontmatter, updates `schema/tags.json`, re-indexes the page in FTS5 (so search picks up the new tags immediately), appends a log entry. **Repeat for every page** the taxonomy grows, and subsequent pages tend to reuse existing tags (consistency).
278
+ Each arrow is a trace-link relationship type (`implemented-by`, `mitigated-by`, `detailed-in`, `implemented-in`, `verified-by`). The DDS→codebase link is stored as a frontmatter field on each DDS page (e.g. a git URL pointing at the implementing module). The DDS→tests link is a standard trace relationship added either by CSV ingest or by `mneme trace add`.
279
+
280
+ Walk either chain from any root page:
281
+
282
+ ```bash
283
+ # Chain A — from a user need forward to the specs that implement it
284
+ mneme trace show parkiwatch/un-001
285
+ # → UN.001 (secure sign-in)
286
+ # implemented-by -> REQ.SYS.001 (User Authentication)
287
+ # detailed-in -> DDS.CYB.001 (Strong Password Policy)
288
+ # detailed-in -> DDS.CYB.002 (Multi-Factor Authentication)
289
+ # ...
290
+
291
+ # Chain B — from a hazard forward to the specs that mitigate it
292
+ mneme trace show parkiwatch/rma-cyb-002
293
+ # → RMA.CYB.002 (Unauthorized access -- weak passwords)
294
+ # mitigated-by -> REQ.SYS.001 (User Authentication)
295
+ # detailed-in -> DDS.CYB.001, DDS.CYB.002, ...
296
+ # implemented-in -> src/auth/password_policy.py (codebase)
297
+ # verified-by -> TEST.AUTH.001 (tests)
298
+
299
+ # Trace gaps for a notified body audit
300
+ mneme trace gaps parkiwatch
301
+ # → Hazards with no mitigation: ...
302
+ # User needs with no requirements: ...
303
+
304
+ # Export the full traceability matrix for the DHF
305
+ mneme trace matrix parkiwatch --csv --out trace-matrix.csv
306
+ ```
230
307
 
231
308
  ### Step 4 — Search the knowledge base (anyone)
232
309
 
@@ -5,4 +5,4 @@ Public API:
5
5
  from mneme.core import ingest_source_to_both, dual_search, ...
6
6
  """
7
7
 
8
- __version__ = "0.5.1"
8
+ __version__ = "0.5.3"
@@ -565,7 +565,7 @@ def ingest_source_to_both(source_path: str, client_slug: str, force: bool = Fals
565
565
  raw_content = '\n\n'.join(sheets)
566
566
  except ImportError:
567
567
  raise ValueError(
568
- 'Excel extraction requires openpyxl. Install: pip install "mneme-cli[xlsx]"'
568
+ 'Excel extraction requires openpyxl. Install: pip install openpyxl'
569
569
  )
570
570
  else:
571
571
  # Generic text fallback
@@ -2103,7 +2103,7 @@ def lint() -> dict:
2103
2103
 
2104
2104
 
2105
2105
  def ingest_dir(directory: str, client_slug: str, force: bool = False,
2106
- recursive: bool = False, preserve_structure: bool = False) -> dict:
2106
+ recursive: bool = False, preserve_structure: bool = True) -> dict:
2107
2107
  """
2108
2108
  Batch ingest all supported files from a directory.
2109
2109
 
@@ -2112,9 +2112,10 @@ def ingest_dir(directory: str, client_slug: str, force: bool = False,
2112
2112
 
2113
2113
  When recursive=True, walks subdirectories as well.
2114
2114
 
2115
- When preserve_structure=True, each file's directory position relative to
2116
- ``directory`` becomes a wiki subdirectory under ``wiki/<client>/``. Also
2117
- naturally resolves same-basename collisions (suggestion #15).
2115
+ When preserve_structure=True (the default), each file's directory position
2116
+ relative to ``directory`` becomes a wiki subdirectory under
2117
+ ``wiki/<client>/``. Also naturally resolves same-basename collisions
2118
+ (suggestion #15). Pass preserve_structure=False for a flat wiki.
2118
2119
 
2119
2120
  Returns a summary of all ingestions.
2120
2121
  """
@@ -2154,10 +2155,18 @@ def ingest_dir(directory: str, client_slug: str, force: bool = False,
2154
2155
 
2155
2156
  for fpath in files:
2156
2157
  fname = os.path.basename(fpath)
2157
- # Compute subpath relative to the input directory when preserving structure
2158
2158
  if preserve_structure:
2159
- sub_rel = os.path.relpath(os.path.dirname(fpath), directory)
2160
- subpath = '' if sub_rel in ('', '.') else sub_rel
2159
+ # Prefer the path relative to sources/<client>/ when the input lives
2160
+ # there, so callers running `ingest-dir sources/<client>/SUBDIR` get
2161
+ # the SUBDIR prefix in the wiki tree (rather than silently flattening
2162
+ # because SUBDIR itself has no nested subdirectories). Falls back to
2163
+ # relative-to-input-directory for sources outside the canonical tree.
2164
+ auto = _auto_detect_subpath(fpath, client_slug)
2165
+ if auto:
2166
+ subpath = auto
2167
+ else:
2168
+ sub_rel = os.path.relpath(os.path.dirname(fpath), directory)
2169
+ subpath = '' if sub_rel in ('', '.') else sub_rel
2161
2170
  else:
2162
2171
  subpath = ''
2163
2172
  try:
@@ -6241,6 +6250,8 @@ def main() -> None:
6241
6250
  ingest_parser.add_argument('file', help='Path to source file (.md, .txt, .pdf)')
6242
6251
  ingest_parser.add_argument('client_slug', help='Client slug (e.g. demo-retail, my-client)')
6243
6252
  ingest_parser.add_argument('--force', action='store_true', help='Re-ingest even if source was previously ingested')
6253
+ ingest_parser.add_argument('--flat', action='store_true',
6254
+ help='Write the page directly to wiki/<client>/ without mirroring source subpath')
6244
6255
 
6245
6256
  # init
6246
6257
  init_parser = subparsers.add_parser('init', help='Initialize a new mneme workspace')
@@ -6256,8 +6267,13 @@ def main() -> None:
6256
6267
  ingest_dir_parser.add_argument('client_slug', help='Client slug (e.g. demo-retail, my-client)')
6257
6268
  ingest_dir_parser.add_argument('--force', action='store_true', help='Re-ingest even if sources were previously ingested')
6258
6269
  ingest_dir_parser.add_argument('--recursive', '-r', action='store_true', help='Recurse into subdirectories')
6259
- ingest_dir_parser.add_argument('--preserve-structure', dest='preserve_structure', action='store_true',
6260
- help='Mirror source directory structure into wiki/<client>/ subdirectories')
6270
+ # Default-on since v0.5.2: mirror source directory structure into the wiki.
6271
+ # --flat is the explicit opt-out for users who want a single-directory wiki.
6272
+ ingest_dir_parser.add_argument('--preserve-structure', dest='preserve_structure',
6273
+ action='store_true', default=True,
6274
+ help='(default) Mirror source directory structure into wiki/<client>/ subdirectories')
6275
+ ingest_dir_parser.add_argument('--flat', dest='preserve_structure', action='store_false',
6276
+ help='Write all pages to wiki/<client>/ without subdirectories')
6261
6277
 
6262
6278
  # tornado
6263
6279
  tornado_parser = subparsers.add_parser('tornado', help='Process inbox: auto-detect, ingest, archive')
@@ -6546,7 +6562,12 @@ def main() -> None:
6546
6562
  print(f'Error: invalid client slug "{args.client_slug}". Use lowercase letters, numbers, hyphens only.', file=sys.stderr)
6547
6563
  sys.exit(1)
6548
6564
  try:
6549
- result = ingest_source_to_both(args.file, args.client_slug, force=args.force)
6565
+ # Auto-mirror the source's position under sources/<client>/ into the
6566
+ # wiki, unless --flat is passed. This makes single-file ingest match
6567
+ # the default ingest-dir behavior (preserve structure) and avoids
6568
+ # same-basename collisions across different source directories.
6569
+ auto_sub = '' if args.flat else _auto_detect_subpath(args.file, args.client_slug)
6570
+ result = ingest_source_to_both(args.file, args.client_slug, force=args.force, subpath=auto_sub)
6550
6571
  if not result:
6551
6572
  # Skipped due to duplicate detection
6552
6573
  sys.exit(0)
@@ -6821,21 +6842,42 @@ def main() -> None:
6821
6842
 
6822
6843
  elif args.command == 'profile':
6823
6844
  if args.profile_command == 'list':
6824
- if os.path.exists(PROFILES_DIR):
6825
- profiles = [f[:-5] for f in os.listdir(PROFILES_DIR) if f.endswith('.json')]
6826
- active = None
6827
- if os.path.exists(ACTIVE_PROFILE_FILE):
6828
- with open(ACTIVE_PROFILE_FILE, 'r') as f:
6829
- active = f.read().strip()
6830
- if profiles:
6831
- print('Available profiles:\n')
6832
- for p in sorted(profiles):
6833
- marker = ' (active)' if p == active else ''
6834
- print(f' - {p}{marker}')
6835
- else:
6836
- print('No profiles found in profiles/ directory.')
6845
+ # Profiles are markdown files. Look in both the workspace profiles
6846
+ # directory (per-project overrides) and the bundled profiles
6847
+ # directory (shipped with mneme). Workspace entries shadow bundled
6848
+ # ones with the same name.
6849
+ workspace_profiles: dict[str, str] = {}
6850
+ bundled_profiles: dict[str, str] = {}
6851
+ if os.path.isdir(WORKSPACE_PROFILES_DIR):
6852
+ for f in os.listdir(WORKSPACE_PROFILES_DIR):
6853
+ if f.endswith('.md'):
6854
+ workspace_profiles[f[:-3]] = 'workspace'
6855
+ if os.path.isdir(PROFILES_DIR):
6856
+ for f in os.listdir(PROFILES_DIR):
6857
+ if f.endswith('.md'):
6858
+ bundled_profiles[f[:-3]] = 'bundled'
6859
+
6860
+ merged = {**bundled_profiles, **workspace_profiles} # workspace overrides
6861
+ active = None
6862
+ if os.path.exists(ACTIVE_PROFILE_FILE):
6863
+ with open(ACTIVE_PROFILE_FILE, 'r') as f:
6864
+ active = f.read().strip()
6865
+
6866
+ if merged:
6867
+ print('Available profiles:\n')
6868
+ for p in sorted(merged):
6869
+ origin = merged[p]
6870
+ shadowed = origin == 'workspace' and p in bundled_profiles
6871
+ markers = []
6872
+ if p == active:
6873
+ markers.append('active')
6874
+ markers.append(origin)
6875
+ if shadowed:
6876
+ markers.append('shadows bundled')
6877
+ print(f' - {p} [{", ".join(markers)}]')
6837
6878
  else:
6838
- print('No profiles/ directory found.')
6879
+ print('No profiles found.')
6880
+ print(f' Searched: {WORKSPACE_PROFILES_DIR} (workspace) and {PROFILES_DIR} (bundled).')
6839
6881
  elif args.profile_command == 'set':
6840
6882
  try:
6841
6883
  set_active_profile(args.name)
@@ -32,12 +32,14 @@ classifiers = [
32
32
  ]
33
33
  dependencies = [
34
34
  "portalocker>=2.0.0",
35
+ "openpyxl>=3.1.0",
35
36
  ]
36
37
 
37
38
  [project.optional-dependencies]
38
39
  pdf = ["pymupdf>=1.23.0"]
40
+ # Kept for backwards compatibility — xlsx support is now built-in.
39
41
  xlsx = ["openpyxl>=3.1.0"]
40
- all = ["pymupdf>=1.23.0", "openpyxl>=3.1.0"]
42
+ all = ["pymupdf>=1.23.0"]
41
43
  release = [
42
44
  "build>=1.0.0",
43
45
  "twine>=5.0.0",
@@ -398,7 +398,13 @@ class TestIngestDirPreserveStructure:
398
398
  with open(full, 'w') as f:
399
399
  f.write(content)
400
400
 
401
- def test_flat_default_unchanged(self, sync_workspace):
401
+ def test_preserve_structure_is_default(self, sync_workspace):
402
+ """v0.5.2+: ingest-dir mirrors source structure by default.
403
+
404
+ Previously flat by default; flipped because flat wikis silently
405
+ overwrite pages that share a basename across different source dirs
406
+ (suggestion #15).
407
+ """
402
408
  from mneme.core import ingest_dir
403
409
  self._make_source_tree(sync_workspace, {
404
410
  'demo/REQUIREMENTS/req-001.md': '# req 1',
@@ -406,9 +412,25 @@ class TestIngestDirPreserveStructure:
406
412
  })
407
413
  ingest_dir(os.path.join(sync_workspace, 'sources', 'demo'),
408
414
  'demo', recursive=True)
409
- # Without --preserve-structure, both pages flatten
415
+ # Default now mirrors source layout
416
+ assert os.path.exists(os.path.join(sync_workspace, 'wiki', 'demo', 'requirements', 'req-001.md'))
417
+ assert os.path.exists(os.path.join(sync_workspace, 'wiki', 'demo', 'design', 'dds-001.md'))
418
+ # Flat-mode pages should NOT exist
419
+ assert not os.path.exists(os.path.join(sync_workspace, 'wiki', 'demo', 'req-001.md'))
420
+ assert not os.path.exists(os.path.join(sync_workspace, 'wiki', 'demo', 'dds-001.md'))
421
+
422
+ def test_flat_opt_out_still_works(self, sync_workspace):
423
+ """Callers can still request a flat wiki with preserve_structure=False."""
424
+ from mneme.core import ingest_dir
425
+ self._make_source_tree(sync_workspace, {
426
+ 'demo/REQUIREMENTS/req-001.md': '# req 1',
427
+ 'demo/DESIGN/dds-001.md': '# dds 1',
428
+ })
429
+ ingest_dir(os.path.join(sync_workspace, 'sources', 'demo'),
430
+ 'demo', recursive=True, preserve_structure=False)
410
431
  assert os.path.exists(os.path.join(sync_workspace, 'wiki', 'demo', 'req-001.md'))
411
432
  assert os.path.exists(os.path.join(sync_workspace, 'wiki', 'demo', 'dds-001.md'))
433
+ assert not os.path.exists(os.path.join(sync_workspace, 'wiki', 'demo', 'requirements', 'req-001.md'))
412
434
 
413
435
  def test_preserve_structure_creates_subdirs(self, sync_workspace):
414
436
  from mneme.core import ingest_dir
@@ -830,6 +852,127 @@ class TestCLI:
830
852
  rc, out, err = _run_mnemo('search', 'xyznonexistent12345qqqzzz')
831
853
  assert rc == 0
832
854
 
855
+ def test_profile_list_discovers_bundled_markdown_profiles(self):
856
+ """Regression: profile list used to filter .json (wrong ext) and only
857
+ look at the bundled dir. Bundled profiles ship as .md files."""
858
+ td = tempfile.mkdtemp(prefix='mneme-profile-list-')
859
+ try:
860
+ for sub in ('wiki', 'sources', 'schema'):
861
+ os.makedirs(os.path.join(td, sub), exist_ok=True)
862
+ with open(os.path.join(td, 'index.md'), 'w') as f:
863
+ f.write('# Index\n')
864
+ with open(os.path.join(td, 'log.md'), 'w') as f:
865
+ f.write('# Log\n')
866
+ for name, empty in (
867
+ ('entities.json', {'version': 1, 'updated': '2026-01-01', 'entities': []}),
868
+ ('tags.json', {'version': 1, 'updated': '2026-01-01', 'tags': {}}),
869
+ ('graph.json', {'version': 1, 'updated': '2026-01-01', 'nodes': [], 'edges': []}),
870
+ ):
871
+ with open(os.path.join(td, 'schema', name), 'w') as f:
872
+ json.dump(empty, f)
873
+
874
+ rc, out, err = _run_mnemo('--workspace', td, 'profile', 'list')
875
+ assert rc == 0, f'profile list failed: {err}'
876
+ assert 'eu-mdr' in out, f'bundled eu-mdr not listed. got: {out}'
877
+ assert 'iso-13485' in out, f'bundled iso-13485 not listed. got: {out}'
878
+ assert 'bundled' in out, f'origin marker missing. got: {out}'
879
+ finally:
880
+ shutil.rmtree(td, ignore_errors=True)
881
+
882
+ def test_single_ingest_auto_detects_subpath_from_sources_tree(self):
883
+ """`mneme ingest` should mirror a file's position under sources/<client>/
884
+ by default (suggestion #15 — avoid silent basename collisions)."""
885
+ td = tempfile.mkdtemp(prefix='mneme-ingest-auto-')
886
+ try:
887
+ for sub in ('wiki', 'sources', 'schema'):
888
+ os.makedirs(os.path.join(td, sub), exist_ok=True)
889
+ with open(os.path.join(td, 'index.md'), 'w') as f:
890
+ f.write('# Index\n')
891
+ with open(os.path.join(td, 'log.md'), 'w') as f:
892
+ f.write('# Log\n')
893
+ for name, empty in (
894
+ ('entities.json', {'version': 1, 'updated': '2026-01-01', 'entities': []}),
895
+ ('tags.json', {'version': 1, 'updated': '2026-01-01', 'tags': {}}),
896
+ ('graph.json', {'version': 1, 'updated': '2026-01-01', 'nodes': [], 'edges': []}),
897
+ ):
898
+ with open(os.path.join(td, 'schema', name), 'w') as f:
899
+ json.dump(empty, f)
900
+
901
+ src = os.path.join(td, 'sources', 'demo', 'TRACE', 'REQ', 'req-001.md')
902
+ os.makedirs(os.path.dirname(src), exist_ok=True)
903
+ with open(src, 'w') as f:
904
+ f.write('# Req 1\n\nFiller body content for a test.\n')
905
+
906
+ rc, out, err = _run_mnemo('--workspace', td, 'ingest', src, 'demo')
907
+ assert rc == 0, f'ingest failed: {err}'
908
+ nested = os.path.join(td, 'wiki', 'demo', 'trace', 'req', 'req-001.md')
909
+ flat = os.path.join(td, 'wiki', 'demo', 'req-001.md')
910
+ assert os.path.exists(nested), f'nested page missing at {nested}'
911
+ assert not os.path.exists(flat), 'flat page should not exist by default'
912
+ finally:
913
+ shutil.rmtree(td, ignore_errors=True)
914
+
915
+ def test_single_ingest_flat_flag_opts_out(self):
916
+ """`mneme ingest --flat` keeps the old flat-wiki behavior."""
917
+ td = tempfile.mkdtemp(prefix='mneme-ingest-flat-')
918
+ try:
919
+ for sub in ('wiki', 'sources', 'schema'):
920
+ os.makedirs(os.path.join(td, sub), exist_ok=True)
921
+ with open(os.path.join(td, 'index.md'), 'w') as f:
922
+ f.write('# Index\n')
923
+ with open(os.path.join(td, 'log.md'), 'w') as f:
924
+ f.write('# Log\n')
925
+ for name, empty in (
926
+ ('entities.json', {'version': 1, 'updated': '2026-01-01', 'entities': []}),
927
+ ('tags.json', {'version': 1, 'updated': '2026-01-01', 'tags': {}}),
928
+ ('graph.json', {'version': 1, 'updated': '2026-01-01', 'nodes': [], 'edges': []}),
929
+ ):
930
+ with open(os.path.join(td, 'schema', name), 'w') as f:
931
+ json.dump(empty, f)
932
+
933
+ src = os.path.join(td, 'sources', 'demo', 'TRACE', 'req-001.md')
934
+ os.makedirs(os.path.dirname(src), exist_ok=True)
935
+ with open(src, 'w') as f:
936
+ f.write('# Req 1\n\nFiller body content for a test.\n')
937
+
938
+ rc, out, err = _run_mnemo('--workspace', td, 'ingest', src, 'demo', '--flat')
939
+ assert rc == 0, f'ingest failed: {err}'
940
+ flat = os.path.join(td, 'wiki', 'demo', 'req-001.md')
941
+ nested = os.path.join(td, 'wiki', 'demo', 'trace', 'req-001.md')
942
+ assert os.path.exists(flat)
943
+ assert not os.path.exists(nested)
944
+ finally:
945
+ shutil.rmtree(td, ignore_errors=True)
946
+
947
+ def test_profile_list_shows_workspace_override(self):
948
+ """Workspace profiles should shadow bundled ones of the same name."""
949
+ td = tempfile.mkdtemp(prefix='mneme-profile-override-')
950
+ try:
951
+ for sub in ('wiki', 'sources', 'schema', 'profiles'):
952
+ os.makedirs(os.path.join(td, sub), exist_ok=True)
953
+ with open(os.path.join(td, 'index.md'), 'w') as f:
954
+ f.write('# Index\n')
955
+ with open(os.path.join(td, 'log.md'), 'w') as f:
956
+ f.write('# Log\n')
957
+ for name, empty in (
958
+ ('entities.json', {'version': 1, 'updated': '2026-01-01', 'entities': []}),
959
+ ('tags.json', {'version': 1, 'updated': '2026-01-01', 'tags': {}}),
960
+ ('graph.json', {'version': 1, 'updated': '2026-01-01', 'nodes': [], 'edges': []}),
961
+ ):
962
+ with open(os.path.join(td, 'schema', name), 'w') as f:
963
+ json.dump(empty, f)
964
+ with open(os.path.join(td, 'profiles', 'eu-mdr.md'), 'w') as f:
965
+ f.write('---\nname: Override\ndescription: local\n---\n# Principles\n- test\n')
966
+ with open(os.path.join(td, 'profiles', 'custom.md'), 'w') as f:
967
+ f.write('---\nname: Custom\ndescription: workspace-only\n---\n# Principles\n- test\n')
968
+
969
+ rc, out, err = _run_mnemo('--workspace', td, 'profile', 'list')
970
+ assert rc == 0
971
+ assert 'custom' in out
972
+ assert 'shadows bundled' in out, f'override marker missing. got: {out}'
973
+ finally:
974
+ shutil.rmtree(td, ignore_errors=True)
975
+
833
976
  def test_ingest_missing_file_exits_nonzero(self):
834
977
  rc, out, err = _run_mnemo('ingest', '/tmp/nonexistent_file_xyz_mnemo.md', 'test')
835
978
  assert rc == 1
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes