dikw-core 0.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (301) hide show
  1. dikw_core-0.0.2/.env.example +38 -0
  2. dikw_core-0.0.2/.github/workflows/ci.yml +113 -0
  3. dikw_core-0.0.2/.github/workflows/release.yml +69 -0
  4. dikw_core-0.0.2/.gitignore +31 -0
  5. dikw_core-0.0.2/.python-version +1 -0
  6. dikw_core-0.0.2/AGENTS.md +161 -0
  7. dikw_core-0.0.2/CHANGELOG.md +487 -0
  8. dikw_core-0.0.2/CLAUDE.md +118 -0
  9. dikw_core-0.0.2/CONTEXT.md +89 -0
  10. dikw_core-0.0.2/INSTALL_FOR_AGENTS.md +248 -0
  11. dikw_core-0.0.2/LICENSE +21 -0
  12. dikw_core-0.0.2/PKG-INFO +282 -0
  13. dikw_core-0.0.2/README.md +244 -0
  14. dikw_core-0.0.2/docs/architecture.md +364 -0
  15. dikw_core-0.0.2/docs/converters.md +164 -0
  16. dikw_core-0.0.2/docs/design.md +679 -0
  17. dikw_core-0.0.2/docs/eval-plan.md +200 -0
  18. dikw_core-0.0.2/docs/getting-started.md +386 -0
  19. dikw_core-0.0.2/docs/lint-orphan-governance.md +185 -0
  20. dikw_core-0.0.2/docs/providers.md +640 -0
  21. dikw_core-0.0.2/docs/server.md +270 -0
  22. dikw_core-0.0.2/evals/.gitignore +8 -0
  23. dikw_core-0.0.2/evals/BASELINES.md +1915 -0
  24. dikw_core-0.0.2/evals/README.md +192 -0
  25. dikw_core-0.0.2/evals/datasets/cmteb-t2-subset/dataset.yaml +69 -0
  26. dikw_core-0.0.2/evals/datasets/mvp/corpus/karpathy-gist.md +78 -0
  27. dikw_core-0.0.2/evals/datasets/mvp/corpus/karpathy-recipe.md +103 -0
  28. dikw_core-0.0.2/evals/datasets/mvp/corpus/karpathy-software-2-0.md +66 -0
  29. dikw_core-0.0.2/evals/datasets/mvp/dataset.yaml +46 -0
  30. dikw_core-0.0.2/evals/datasets/mvp/expected.yaml +36 -0
  31. dikw_core-0.0.2/evals/datasets/mvp/queries.yaml +36 -0
  32. dikw_core-0.0.2/evals/datasets/scifact/dataset.yaml +58 -0
  33. dikw_core-0.0.2/evals/datasets/wiki-mini-mm/ATTRIBUTION.md +50 -0
  34. dikw_core-0.0.2/evals/datasets/wiki-mini-mm/corpus/eiffel_tower.md +25 -0
  35. dikw_core-0.0.2/evals/datasets/wiki-mini-mm/corpus/great_wall_of_china.md +24 -0
  36. dikw_core-0.0.2/evals/datasets/wiki-mini-mm/corpus/images/eiffel_tower.jpg +0 -0
  37. dikw_core-0.0.2/evals/datasets/wiki-mini-mm/corpus/images/great_wall_of_china.jpg +0 -0
  38. dikw_core-0.0.2/evals/datasets/wiki-mini-mm/corpus/images/lion.jpg +0 -0
  39. dikw_core-0.0.2/evals/datasets/wiki-mini-mm/corpus/images/mona_lisa.jpg +0 -0
  40. dikw_core-0.0.2/evals/datasets/wiki-mini-mm/corpus/images/mount_fuji.jpg +0 -0
  41. dikw_core-0.0.2/evals/datasets/wiki-mini-mm/corpus/images/sushi.jpg +0 -0
  42. dikw_core-0.0.2/evals/datasets/wiki-mini-mm/corpus/lion.md +25 -0
  43. dikw_core-0.0.2/evals/datasets/wiki-mini-mm/corpus/mona_lisa.md +25 -0
  44. dikw_core-0.0.2/evals/datasets/wiki-mini-mm/corpus/mount_fuji.md +24 -0
  45. dikw_core-0.0.2/evals/datasets/wiki-mini-mm/corpus/sushi.md +25 -0
  46. dikw_core-0.0.2/evals/datasets/wiki-mini-mm/dataset.yaml +6 -0
  47. dikw_core-0.0.2/evals/datasets/wiki-mini-mm/queries.yaml +163 -0
  48. dikw_core-0.0.2/evals/datasets/wiki-mini-mm/targets.yaml +86 -0
  49. dikw_core-0.0.2/evals/tools/__init__.py +8 -0
  50. dikw_core-0.0.2/evals/tools/_common.py +185 -0
  51. dikw_core-0.0.2/evals/tools/convert_beir.py +289 -0
  52. dikw_core-0.0.2/evals/tools/convert_cmteb.py +272 -0
  53. dikw_core-0.0.2/evals/tools/convert_wiki_slice.py +423 -0
  54. dikw_core-0.0.2/evals/tools/prep_cmteb_t2.py +334 -0
  55. dikw_core-0.0.2/evals/tools/probe_multimodal.py +501 -0
  56. dikw_core-0.0.2/evals/tools/run_phase15_from_snapshot.py +259 -0
  57. dikw_core-0.0.2/evals/tools/sweep_rrf.py +286 -0
  58. dikw_core-0.0.2/pyproject.toml +125 -0
  59. dikw_core-0.0.2/src/dikw_core/__init__.py +10 -0
  60. dikw_core-0.0.2/src/dikw_core/api.py +3434 -0
  61. dikw_core-0.0.2/src/dikw_core/auth_cli.py +358 -0
  62. dikw_core-0.0.2/src/dikw_core/cli.py +205 -0
  63. dikw_core-0.0.2/src/dikw_core/client/__init__.py +20 -0
  64. dikw_core-0.0.2/src/dikw_core/client/cli_app.py +1951 -0
  65. dikw_core-0.0.2/src/dikw_core/client/config.py +186 -0
  66. dikw_core-0.0.2/src/dikw_core/client/converters.py +227 -0
  67. dikw_core-0.0.2/src/dikw_core/client/importer.py +506 -0
  68. dikw_core-0.0.2/src/dikw_core/client/progress.py +823 -0
  69. dikw_core-0.0.2/src/dikw_core/client/serve_and_run.py +275 -0
  70. dikw_core-0.0.2/src/dikw_core/client/task_follow.py +191 -0
  71. dikw_core-0.0.2/src/dikw_core/client/transport.py +377 -0
  72. dikw_core-0.0.2/src/dikw_core/config.py +353 -0
  73. dikw_core-0.0.2/src/dikw_core/domains/__init__.py +0 -0
  74. dikw_core-0.0.2/src/dikw_core/domains/data/__init__.py +0 -0
  75. dikw_core-0.0.2/src/dikw_core/domains/data/assets.py +392 -0
  76. dikw_core-0.0.2/src/dikw_core/domains/data/backends/__init__.py +34 -0
  77. dikw_core-0.0.2/src/dikw_core/domains/data/backends/base.py +70 -0
  78. dikw_core-0.0.2/src/dikw_core/domains/data/backends/markdown.py +77 -0
  79. dikw_core-0.0.2/src/dikw_core/domains/data/hashing.py +20 -0
  80. dikw_core-0.0.2/src/dikw_core/domains/data/path_norm.py +56 -0
  81. dikw_core-0.0.2/src/dikw_core/domains/data/sources.py +45 -0
  82. dikw_core-0.0.2/src/dikw_core/domains/info/__init__.py +0 -0
  83. dikw_core-0.0.2/src/dikw_core/domains/info/chunk.py +215 -0
  84. dikw_core-0.0.2/src/dikw_core/domains/info/embed.py +349 -0
  85. dikw_core-0.0.2/src/dikw_core/domains/info/render.py +96 -0
  86. dikw_core-0.0.2/src/dikw_core/domains/info/search.py +723 -0
  87. dikw_core-0.0.2/src/dikw_core/domains/info/tokenize.py +182 -0
  88. dikw_core-0.0.2/src/dikw_core/domains/knowledge/__init__.py +0 -0
  89. dikw_core-0.0.2/src/dikw_core/domains/knowledge/grouping.py +173 -0
  90. dikw_core-0.0.2/src/dikw_core/domains/knowledge/indexgen.py +89 -0
  91. dikw_core-0.0.2/src/dikw_core/domains/knowledge/links.py +332 -0
  92. dikw_core-0.0.2/src/dikw_core/domains/knowledge/lint.py +325 -0
  93. dikw_core-0.0.2/src/dikw_core/domains/knowledge/lint_fix.py +991 -0
  94. dikw_core-0.0.2/src/dikw_core/domains/knowledge/lint_fixers/__init__.py +28 -0
  95. dikw_core-0.0.2/src/dikw_core/domains/knowledge/lint_fixers/broken_wikilink.py +567 -0
  96. dikw_core-0.0.2/src/dikw_core/domains/knowledge/lint_fixers/non_atomic_page.py +211 -0
  97. dikw_core-0.0.2/src/dikw_core/domains/knowledge/lint_fixers/orphan_page.py +820 -0
  98. dikw_core-0.0.2/src/dikw_core/domains/knowledge/log.py +68 -0
  99. dikw_core-0.0.2/src/dikw_core/domains/knowledge/page_index.py +137 -0
  100. dikw_core-0.0.2/src/dikw_core/domains/knowledge/synthesize.py +305 -0
  101. dikw_core-0.0.2/src/dikw_core/domains/knowledge/wiki.py +192 -0
  102. dikw_core-0.0.2/src/dikw_core/domains/wisdom/__init__.py +0 -0
  103. dikw_core-0.0.2/src/dikw_core/domains/wisdom/apply.py +98 -0
  104. dikw_core-0.0.2/src/dikw_core/domains/wisdom/distill.py +156 -0
  105. dikw_core-0.0.2/src/dikw_core/domains/wisdom/io.py +159 -0
  106. dikw_core-0.0.2/src/dikw_core/domains/wisdom/review.py +94 -0
  107. dikw_core-0.0.2/src/dikw_core/eval/__init__.py +9 -0
  108. dikw_core-0.0.2/src/dikw_core/eval/dataset.py +642 -0
  109. dikw_core-0.0.2/src/dikw_core/eval/fake_embedder.py +49 -0
  110. dikw_core-0.0.2/src/dikw_core/eval/judge.py +207 -0
  111. dikw_core-0.0.2/src/dikw_core/eval/metrics.py +504 -0
  112. dikw_core-0.0.2/src/dikw_core/eval/runner.py +1348 -0
  113. dikw_core-0.0.2/src/dikw_core/logging.py +59 -0
  114. dikw_core-0.0.2/src/dikw_core/md_inspect.py +334 -0
  115. dikw_core-0.0.2/src/dikw_core/progress.py +117 -0
  116. dikw_core-0.0.2/src/dikw_core/prompts/__init__.py +24 -0
  117. dikw_core-0.0.2/src/dikw_core/prompts/distill.md +40 -0
  118. dikw_core-0.0.2/src/dikw_core/prompts/eval_judge_synth.md +41 -0
  119. dikw_core-0.0.2/src/dikw_core/prompts/lint_fix_broken_wikilink_grounded.md +89 -0
  120. dikw_core-0.0.2/src/dikw_core/prompts/lint_fix_orphan_merge.md +67 -0
  121. dikw_core-0.0.2/src/dikw_core/prompts/synthesize.md +77 -0
  122. dikw_core-0.0.2/src/dikw_core/providers/__init__.py +112 -0
  123. dikw_core-0.0.2/src/dikw_core/providers/_http.py +40 -0
  124. dikw_core-0.0.2/src/dikw_core/providers/anthropic_compat.py +195 -0
  125. dikw_core-0.0.2/src/dikw_core/providers/base.py +127 -0
  126. dikw_core-0.0.2/src/dikw_core/providers/codex_auth.py +1210 -0
  127. dikw_core-0.0.2/src/dikw_core/providers/gitee_multimodal.py +153 -0
  128. dikw_core-0.0.2/src/dikw_core/providers/openai_codex.py +264 -0
  129. dikw_core-0.0.2/src/dikw_core/providers/openai_compat.py +241 -0
  130. dikw_core-0.0.2/src/dikw_core/schemas.py +650 -0
  131. dikw_core-0.0.2/src/dikw_core/server/__init__.py +22 -0
  132. dikw_core-0.0.2/src/dikw_core/server/_time.py +27 -0
  133. dikw_core-0.0.2/src/dikw_core/server/app.py +79 -0
  134. dikw_core-0.0.2/src/dikw_core/server/auth.py +112 -0
  135. dikw_core-0.0.2/src/dikw_core/server/errors.py +84 -0
  136. dikw_core-0.0.2/src/dikw_core/server/ingest_op.py +101 -0
  137. dikw_core-0.0.2/src/dikw_core/server/lint_op.py +130 -0
  138. dikw_core-0.0.2/src/dikw_core/server/ndjson.py +127 -0
  139. dikw_core-0.0.2/src/dikw_core/server/routes_assets.py +59 -0
  140. dikw_core-0.0.2/src/dikw_core/server/routes_graph.py +40 -0
  141. dikw_core-0.0.2/src/dikw_core/server/routes_import.py +571 -0
  142. dikw_core-0.0.2/src/dikw_core/server/routes_pages.py +84 -0
  143. dikw_core-0.0.2/src/dikw_core/server/routes_retrieve.py +202 -0
  144. dikw_core-0.0.2/src/dikw_core/server/routes_sync.py +299 -0
  145. dikw_core-0.0.2/src/dikw_core/server/routes_tasks.py +573 -0
  146. dikw_core-0.0.2/src/dikw_core/server/runtime.py +252 -0
  147. dikw_core-0.0.2/src/dikw_core/server/synth_op.py +332 -0
  148. dikw_core-0.0.2/src/dikw_core/server/tasks/__init__.py +109 -0
  149. dikw_core-0.0.2/src/dikw_core/server/tasks/events.py +101 -0
  150. dikw_core-0.0.2/src/dikw_core/server/tasks/manager.py +338 -0
  151. dikw_core-0.0.2/src/dikw_core/server/tasks/store.py +168 -0
  152. dikw_core-0.0.2/src/dikw_core/server/tasks/store_postgres.py +395 -0
  153. dikw_core-0.0.2/src/dikw_core/server/tasks/store_sqlite.py +398 -0
  154. dikw_core-0.0.2/src/dikw_core/storage/__init__.py +62 -0
  155. dikw_core-0.0.2/src/dikw_core/storage/_schema.py +49 -0
  156. dikw_core-0.0.2/src/dikw_core/storage/_vec_codec.py +24 -0
  157. dikw_core-0.0.2/src/dikw_core/storage/base.py +476 -0
  158. dikw_core-0.0.2/src/dikw_core/storage/migrations/__init__.py +0 -0
  159. dikw_core-0.0.2/src/dikw_core/storage/migrations/postgres/__init__.py +0 -0
  160. dikw_core-0.0.2/src/dikw_core/storage/migrations/postgres/schema.sql +219 -0
  161. dikw_core-0.0.2/src/dikw_core/storage/migrations/sqlite/__init__.py +0 -0
  162. dikw_core-0.0.2/src/dikw_core/storage/migrations/sqlite/schema.sql +254 -0
  163. dikw_core-0.0.2/src/dikw_core/storage/postgres.py +1592 -0
  164. dikw_core-0.0.2/src/dikw_core/storage/sqlite.py +1827 -0
  165. dikw_core-0.0.2/tests/__init__.py +0 -0
  166. dikw_core-0.0.2/tests/client/__init__.py +0 -0
  167. dikw_core-0.0.2/tests/client/test_cli_assets.py +78 -0
  168. dikw_core-0.0.2/tests/client/test_cli_e2e.py +579 -0
  169. dikw_core-0.0.2/tests/client/test_cli_graph.py +95 -0
  170. dikw_core-0.0.2/tests/client/test_cli_lint_fix.py +235 -0
  171. dikw_core-0.0.2/tests/client/test_cli_op_async.py +135 -0
  172. dikw_core-0.0.2/tests/client/test_cli_tasks.py +147 -0
  173. dikw_core-0.0.2/tests/client/test_config.py +162 -0
  174. dikw_core-0.0.2/tests/client/test_converters.py +271 -0
  175. dikw_core-0.0.2/tests/client/test_import.py +290 -0
  176. dikw_core-0.0.2/tests/client/test_import_cli.py +253 -0
  177. dikw_core-0.0.2/tests/client/test_import_with_converter.py +242 -0
  178. dikw_core-0.0.2/tests/client/test_progress.py +213 -0
  179. dikw_core-0.0.2/tests/client/test_serve_and_run.py +289 -0
  180. dikw_core-0.0.2/tests/client/test_task_follow.py +149 -0
  181. dikw_core-0.0.2/tests/client/test_transport.py +248 -0
  182. dikw_core-0.0.2/tests/conftest.py +265 -0
  183. dikw_core-0.0.2/tests/fakes.py +534 -0
  184. dikw_core-0.0.2/tests/fixtures/beir-tiny/corpus.jsonl +5 -0
  185. dikw_core-0.0.2/tests/fixtures/beir-tiny/qrels/test.tsv +7 -0
  186. dikw_core-0.0.2/tests/fixtures/beir-tiny/queries.jsonl +4 -0
  187. dikw_core-0.0.2/tests/fixtures/live-minimax-gitee.dikw.yml +29 -0
  188. dikw_core-0.0.2/tests/fixtures/notes/dikw.md +16 -0
  189. dikw_core-0.0.2/tests/fixtures/notes/karpathy-wiki.md +20 -0
  190. dikw_core-0.0.2/tests/fixtures/notes/retrieval.md +13 -0
  191. dikw_core-0.0.2/tests/server/__init__.py +0 -0
  192. dikw_core-0.0.2/tests/server/_import_helpers.py +72 -0
  193. dikw_core-0.0.2/tests/server/conftest.py +165 -0
  194. dikw_core-0.0.2/tests/server/test_auth.py +118 -0
  195. dikw_core-0.0.2/tests/server/test_eval_op.py +57 -0
  196. dikw_core-0.0.2/tests/server/test_health_route.py +242 -0
  197. dikw_core-0.0.2/tests/server/test_import.py +227 -0
  198. dikw_core-0.0.2/tests/server/test_import_packages.py +363 -0
  199. dikw_core-0.0.2/tests/server/test_ingest_task.py +192 -0
  200. dikw_core-0.0.2/tests/server/test_init_route.py +46 -0
  201. dikw_core-0.0.2/tests/server/test_lint_fix_routes.py +191 -0
  202. dikw_core-0.0.2/tests/server/test_query_route_removed.py +35 -0
  203. dikw_core-0.0.2/tests/server/test_retrieve_stream.py +316 -0
  204. dikw_core-0.0.2/tests/server/test_routes_assets.py +109 -0
  205. dikw_core-0.0.2/tests/server/test_routes_graph.py +148 -0
  206. dikw_core-0.0.2/tests/server/test_routes_page_links.py +174 -0
  207. dikw_core-0.0.2/tests/server/test_routes_pages.py +143 -0
  208. dikw_core-0.0.2/tests/server/test_routes_sync.py +423 -0
  209. dikw_core-0.0.2/tests/server/test_runtime.py +50 -0
  210. dikw_core-0.0.2/tests/server/test_synth_distill_tasks.py +220 -0
  211. dikw_core-0.0.2/tests/server/test_task_manager.py +356 -0
  212. dikw_core-0.0.2/tests/server/test_task_store_contract.py +262 -0
  213. dikw_core-0.0.2/tests/test_api_assets.py +93 -0
  214. dikw_core-0.0.2/tests/test_api_graph.py +570 -0
  215. dikw_core-0.0.2/tests/test_api_health.py +80 -0
  216. dikw_core-0.0.2/tests/test_api_links.py +271 -0
  217. dikw_core-0.0.2/tests/test_api_max_tokens_threading.py +140 -0
  218. dikw_core-0.0.2/tests/test_api_pages.py +437 -0
  219. dikw_core-0.0.2/tests/test_assets_materialize.py +557 -0
  220. dikw_core-0.0.2/tests/test_atomicity_check.py +208 -0
  221. dikw_core-0.0.2/tests/test_auth_cli.py +273 -0
  222. dikw_core-0.0.2/tests/test_backends.py +35 -0
  223. dikw_core-0.0.2/tests/test_check_command.py +394 -0
  224. dikw_core-0.0.2/tests/test_chunk_asset_spans.py +133 -0
  225. dikw_core-0.0.2/tests/test_chunker.py +93 -0
  226. dikw_core-0.0.2/tests/test_cli.py +81 -0
  227. dikw_core-0.0.2/tests/test_codex_auth.py +403 -0
  228. dikw_core-0.0.2/tests/test_codex_auth_device_flow.py +405 -0
  229. dikw_core-0.0.2/tests/test_codex_auth_migration.py +452 -0
  230. dikw_core-0.0.2/tests/test_codex_auth_refresh.py +335 -0
  231. dikw_core-0.0.2/tests/test_config.py +269 -0
  232. dikw_core-0.0.2/tests/test_convert_beir.py +215 -0
  233. dikw_core-0.0.2/tests/test_convert_cmteb.py +165 -0
  234. dikw_core-0.0.2/tests/test_distill_parser.py +85 -0
  235. dikw_core-0.0.2/tests/test_domains_layout.py +28 -0
  236. dikw_core-0.0.2/tests/test_embed_multimodal.py +292 -0
  237. dikw_core-0.0.2/tests/test_embed_perf.py +168 -0
  238. dikw_core-0.0.2/tests/test_embed_streaming.py +372 -0
  239. dikw_core-0.0.2/tests/test_embed_versioning.py +159 -0
  240. dikw_core-0.0.2/tests/test_eval_cli.py +160 -0
  241. dikw_core-0.0.2/tests/test_eval_dataset.py +650 -0
  242. dikw_core-0.0.2/tests/test_eval_judge.py +227 -0
  243. dikw_core-0.0.2/tests/test_eval_metrics.py +661 -0
  244. dikw_core-0.0.2/tests/test_eval_runner.py +1037 -0
  245. dikw_core-0.0.2/tests/test_evals_tools.py +100 -0
  246. dikw_core-0.0.2/tests/test_filename_sanitize.py +145 -0
  247. dikw_core-0.0.2/tests/test_grouping.py +182 -0
  248. dikw_core-0.0.2/tests/test_hashing.py +50 -0
  249. dikw_core-0.0.2/tests/test_indexgen_and_log.py +62 -0
  250. dikw_core-0.0.2/tests/test_ingest_and_query.py +44 -0
  251. dikw_core-0.0.2/tests/test_ingest_errors.py +248 -0
  252. dikw_core-0.0.2/tests/test_ingest_query_multimodal.py +506 -0
  253. dikw_core-0.0.2/tests/test_links.py +228 -0
  254. dikw_core-0.0.2/tests/test_lint.py +412 -0
  255. dikw_core-0.0.2/tests/test_lint_apply.py +1406 -0
  256. dikw_core-0.0.2/tests/test_lint_fixers.py +1358 -0
  257. dikw_core-0.0.2/tests/test_lint_orphan_fixer.py +1413 -0
  258. dikw_core-0.0.2/tests/test_lint_propose.py +311 -0
  259. dikw_core-0.0.2/tests/test_lint_propose_apply.py +186 -0
  260. dikw_core-0.0.2/tests/test_lint_skip_frontmatter.py +150 -0
  261. dikw_core-0.0.2/tests/test_llm_stream_event_reasoning.py +75 -0
  262. dikw_core-0.0.2/tests/test_logging.py +72 -0
  263. dikw_core-0.0.2/tests/test_markdown_backend.py +32 -0
  264. dikw_core-0.0.2/tests/test_markdown_extract_refs.py +154 -0
  265. dikw_core-0.0.2/tests/test_md_inspect.py +187 -0
  266. dikw_core-0.0.2/tests/test_multimodal_provider.py +173 -0
  267. dikw_core-0.0.2/tests/test_mvp_e2e.py +124 -0
  268. dikw_core-0.0.2/tests/test_no_filesystem_backend.py +52 -0
  269. dikw_core-0.0.2/tests/test_no_html_backend.py +34 -0
  270. dikw_core-0.0.2/tests/test_page_index.py +117 -0
  271. dikw_core-0.0.2/tests/test_path_normalization.py +167 -0
  272. dikw_core-0.0.2/tests/test_persist_wiki_page.py +184 -0
  273. dikw_core-0.0.2/tests/test_phase3_pipeline.py +186 -0
  274. dikw_core-0.0.2/tests/test_prep_cmteb_t2.py +180 -0
  275. dikw_core-0.0.2/tests/test_progress_reporter.py +287 -0
  276. dikw_core-0.0.2/tests/test_provider_anthropic_base_url.py +65 -0
  277. dikw_core-0.0.2/tests/test_provider_anthropic_retries.py +67 -0
  278. dikw_core-0.0.2/tests/test_provider_config_codex_validator.py +70 -0
  279. dikw_core-0.0.2/tests/test_provider_contract.py +453 -0
  280. dikw_core-0.0.2/tests/test_provider_openai_codex.py +522 -0
  281. dikw_core-0.0.2/tests/test_provider_openai_codex_factory.py +40 -0
  282. dikw_core-0.0.2/tests/test_provider_openai_codex_retries.py +121 -0
  283. dikw_core-0.0.2/tests/test_provider_openai_compat_base_url.py +251 -0
  284. dikw_core-0.0.2/tests/test_provider_openai_compat_retries.py +138 -0
  285. dikw_core-0.0.2/tests/test_query_asset_refs.py +291 -0
  286. dikw_core-0.0.2/tests/test_render_chunk.py +213 -0
  287. dikw_core-0.0.2/tests/test_retrieval_quality.py +21 -0
  288. dikw_core-0.0.2/tests/test_schemas.py +129 -0
  289. dikw_core-0.0.2/tests/test_search.py +1046 -0
  290. dikw_core-0.0.2/tests/test_search_graph_leg.py +300 -0
  291. dikw_core-0.0.2/tests/test_storage_contract.py +2565 -0
  292. dikw_core-0.0.2/tests/test_sweep_rrf.py +276 -0
  293. dikw_core-0.0.2/tests/test_synth_existing_pages.py +333 -0
  294. dikw_core-0.0.2/tests/test_synth_observability.py +288 -0
  295. dikw_core-0.0.2/tests/test_synth_quality.py +341 -0
  296. dikw_core-0.0.2/tests/test_synthesize_parser.py +387 -0
  297. dikw_core-0.0.2/tests/test_synthesize_pipeline.py +566 -0
  298. dikw_core-0.0.2/tests/test_tokenize.py +148 -0
  299. dikw_core-0.0.2/tests/test_wiki_page.py +70 -0
  300. dikw_core-0.0.2/tests/test_wisdom_apply.py +42 -0
  301. dikw_core-0.0.2/uv.lock +1215 -0
@@ -0,0 +1,38 @@
1
+ # Copy to `.env` (gitignored) and fill in real values.
2
+ # Loaded automatically by pytest-dotenv for tests; for CLI use
3
+ # `uv run --env-file .env dikw …` or `set -a; source .env; set +a`.
4
+ #
5
+ # This file holds SECRETS ONLY. Non-secret config (endpoint URLs, model
6
+ # names, embedding dimensions, batch sizes, display labels) lives in
7
+ # `dikw.yml`. The live e2e test reads its provider config from
8
+ # `tests/fixtures/live-minimax-gitee.dikw.yml` — edit that file (or your
9
+ # own `dikw.yml`) to point at a different vendor; do not add vendor
10
+ # prefixes to env vars here.
11
+
12
+ # ---- LLM credentials (Anthropic SDK) ----
13
+ # Consumed by the `anthropic` async SDK. When `provider.llm: anthropic`,
14
+ # set this to your Anthropic key — or to a MiniMax key, since MiniMax
15
+ # exposes an Anthropic-compatible endpoint via `provider.llm_base_url`.
16
+ ANTHROPIC_API_KEY=
17
+
18
+ # ---- LLM credentials (OpenAI SDK) ----
19
+ # Consumed by the `openai` async SDK. Only needed when
20
+ # `provider.llm: openai_compat` (OpenAI, Azure, Ollama, vLLM, DeepSeek,
21
+ # GLM, Gemini OpenAI-compat, …). Not read by the embedding leg.
22
+ OPENAI_API_KEY=
23
+
24
+ # ---- Embedding credentials ----
25
+ # Required for every embedding call. Deliberately independent from
26
+ # OPENAI_API_KEY so LLM and embedding can target different vendors
27
+ # (e.g., MiniMax LLM + Gitee AI embeddings) without cross-wiring
28
+ # credentials. No fallback — a missing value raises a clear error.
29
+ DIKW_EMBEDDING_API_KEY=
30
+
31
+ # ---- optional: Postgres storage-contract tests ----
32
+ # Leave unset to skip; set to a pgvector-enabled DSN to include them.
33
+ # DIKW_TEST_POSTGRES_DSN=postgresql://dikw:dikw@localhost:5432/dikw_test
34
+
35
+ # ---- optional: log level ----
36
+ # Controls the root logger level for `dikw …` and `dikw serve`.
37
+ # Accepts DEBUG / INFO / WARNING / ERROR / CRITICAL. Default INFO.
38
+ # DIKW_LOG_LEVEL=INFO
@@ -0,0 +1,113 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+
8
+ concurrency:
9
+ group: ${{ github.workflow }}-${{ github.ref }}
10
+ cancel-in-progress: true
11
+
12
+ jobs:
13
+ lint-type-test:
14
+ runs-on: ubuntu-latest
15
+ strategy:
16
+ fail-fast: false
17
+ matrix:
18
+ python-version: ["3.12", "3.13"]
19
+ steps:
20
+ - uses: actions/checkout@v4
21
+
22
+ - name: Install uv
23
+ uses: astral-sh/setup-uv@v3
24
+ with:
25
+ enable-cache: true
26
+
27
+ - name: Set up Python ${{ matrix.python-version }}
28
+ run: uv python install ${{ matrix.python-version }}
29
+
30
+ - name: Sync dependencies
31
+ run: uv sync --all-extras
32
+
33
+ - name: Ruff
34
+ run: uv run ruff check .
35
+
36
+ - name: Mypy
37
+ run: uv run mypy src
38
+
39
+ - name: Pytest (fast, excludes slow + perf)
40
+ run: uv run pytest -v -m "not slow and not perf"
41
+
42
+ server-e2e:
43
+ name: Server e2e (serve-and-run)
44
+ runs-on: ubuntu-latest
45
+ steps:
46
+ - uses: actions/checkout@v4
47
+
48
+ - name: Install uv
49
+ uses: astral-sh/setup-uv@v3
50
+ with:
51
+ enable-cache: true
52
+
53
+ - name: Set up Python 3.12
54
+ run: uv python install 3.12
55
+
56
+ - name: Sync dependencies
57
+ run: uv sync --all-extras
58
+
59
+ - name: Slow tests (subprocess + bind real port)
60
+ # Covers ``client/serve_and_run.py`` end-to-end: spawns the real
61
+ # ``dikw serve`` process, polls /v1/healthz, runs the inner
62
+ # client command, and asserts a clean shutdown. Splitting this
63
+ # out from the matrix job keeps the failure mode visible (a
64
+ # broken subprocess lifecycle dominates the CI report instead
65
+ # of being one row in a pytest summary).
66
+ run: uv run pytest -v -m "slow"
67
+
68
+ postgres-contract:
69
+ name: Postgres contract tests
70
+ runs-on: ubuntu-latest
71
+ services:
72
+ postgres:
73
+ image: pgvector/pgvector:pg16
74
+ env:
75
+ POSTGRES_USER: dikw
76
+ POSTGRES_PASSWORD: dikw
77
+ POSTGRES_DB: dikw
78
+ ports:
79
+ - 5432:5432
80
+ options: >-
81
+ --health-cmd "pg_isready -U dikw -d dikw"
82
+ --health-interval 5s
83
+ --health-timeout 5s
84
+ --health-retries 10
85
+ env:
86
+ DIKW_TEST_POSTGRES_DSN: postgresql://dikw:dikw@localhost:5432/dikw
87
+ steps:
88
+ - uses: actions/checkout@v4
89
+
90
+ - name: Install uv
91
+ uses: astral-sh/setup-uv@v3
92
+ with:
93
+ enable-cache: true
94
+
95
+ - name: Set up Python 3.12
96
+ run: uv python install 3.12
97
+
98
+ - name: Sync dependencies (with postgres extra)
99
+ run: uv sync --all-extras
100
+
101
+ - name: Wait for Postgres
102
+ run: |
103
+ for i in {1..30}; do
104
+ if pg_isready -h localhost -p 5432 -U dikw; then
105
+ echo "postgres is ready"; exit 0
106
+ fi
107
+ sleep 1
108
+ done
109
+ echo "postgres failed to become ready" >&2
110
+ exit 1
111
+
112
+ - name: Run storage contract tests against Postgres
113
+ run: uv run pytest -v tests/test_storage_contract.py
@@ -0,0 +1,69 @@
1
+ name: Release
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - "v*"
7
+
8
+ permissions:
9
+ contents: read
10
+ id-token: write # required for PyPI trusted publishing
11
+
12
+ jobs:
13
+ build:
14
+ name: Build sdist + wheel
15
+ runs-on: ubuntu-latest
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+
19
+ - name: Install uv
20
+ uses: astral-sh/setup-uv@v3
21
+ with:
22
+ enable-cache: true
23
+
24
+ - name: Set up Python 3.12
25
+ run: uv python install 3.12
26
+
27
+ - name: Verify tag matches pyproject version
28
+ run: |
29
+ set -e
30
+ tag_version="${GITHUB_REF_NAME#v}"
31
+ pyproject_version=$(uv run --no-project python -c \
32
+ "import tomllib,sys;print(tomllib.loads(open('pyproject.toml','rb').read().decode())['project']['version'])")
33
+ if [ "$tag_version" != "$pyproject_version" ]; then
34
+ echo "tag $GITHUB_REF_NAME does not match pyproject version $pyproject_version" >&2
35
+ exit 1
36
+ fi
37
+
38
+ - name: Run test gate before releasing
39
+ run: |
40
+ uv sync --all-extras
41
+ uv run ruff check .
42
+ uv run mypy src
43
+ uv run pytest -q
44
+
45
+ - name: Build distributions
46
+ run: uv build
47
+
48
+ - name: Upload dist artifacts
49
+ uses: actions/upload-artifact@v4
50
+ with:
51
+ name: dist
52
+ path: dist/
53
+
54
+ publish:
55
+ name: Publish to PyPI (trusted publishing)
56
+ needs: build
57
+ runs-on: ubuntu-latest
58
+ environment:
59
+ name: pypi
60
+ url: https://pypi.org/p/dikw-core
61
+ steps:
62
+ - name: Download dist artifacts
63
+ uses: actions/download-artifact@v4
64
+ with:
65
+ name: dist
66
+ path: dist/
67
+
68
+ - name: Publish
69
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,31 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *.egg-info/
4
+ .pytest_cache/
5
+ .mypy_cache/
6
+ .ruff_cache/
7
+ .coverage
8
+ htmlcov/
9
+ dist/
10
+ build/
11
+ .venv/
12
+ venv/
13
+ *.sqlite
14
+ *.sqlite-journal
15
+ *.db
16
+ .env
17
+ .env.*
18
+ !.env.example
19
+ .DS_Store
20
+ .idea/
21
+ .vscode/
22
+ .claude/
23
+ # engine-managed state in example/test wikis
24
+ **/.dikw/
25
+ # parallel-development worktrees
26
+ .worktrees/
27
+ # throwaway smoke / debug tools under evals/tools — names prefixed with _
28
+ evals/tools/_*_smoke.py
29
+ # eval-snapshot cache (per-dataset ingested wikis; built on first eval run,
30
+ # reused across re-runs). Each snapshot is ~30-150 MB SQLite + corpus copy.
31
+ evals/.cache/
@@ -0,0 +1 @@
1
+ 3.12
@@ -0,0 +1,161 @@
1
+ # AGENTS.md
2
+
3
+ > Guide for AI agents that **use** dikw-core as a knowledge backend. If you're
4
+ > here to contribute code to the engine itself, see [CLAUDE.md](./CLAUDE.md)
5
+ > instead.
6
+
7
+ ## What dikw-core is, from your point of view
8
+
9
+ `dikw-core` is a Python service that turns a directory of markdown notes
10
+ (plus assets) into a queryable knowledge engine spanning the **D**ata →
11
+ **I**nformation → **K**nowledge → **W**isdom layers. The directory it points
12
+ at is called a **dikw base** — `dikw.yml` lives at its root, `sources/`
13
+ holds the raw notes, and the engine writes K-layer wiki pages and W-layer
14
+ distilled wisdom back into the same tree as plain markdown.
15
+
16
+ You talk to it over HTTP+NDJSON via a long-running `dikw serve` process.
17
+ The Typer CLI (`dikw client …`) is one client; you can write your own
18
+ agent loop with any HTTP library.
19
+
20
+ The full server spec — auth posture, NDJSON wire format, every route — is
21
+ in [`docs/server.md`](./docs/server.md).
22
+
23
+ ## Bootstrap
24
+
25
+ You probably want [`INSTALL_FOR_AGENTS.md`](./INSTALL_FOR_AGENTS.md). It
26
+ walks through install → init a base → set keys → start the server →
27
+ first retrieve call in concrete commands.
28
+
29
+ ## Endpoints you will actually use
30
+
31
+ The agent surface is intentionally small. The server is **manually
32
+ started by the human operator** — if you can't reach `GET /v1/health`,
33
+ ask the user to run `dikw serve` (don't try to start it yourself).
34
+
35
+ **dikw-core does NOT do LLM answer synthesis.** It hands you ranked
36
+ chunks + applicable wisdom + the parsed wiki tree. Composing those into
37
+ an answer is your job — you run your own LLM with your own prompt,
38
+ applying query rewrite / expansion / conversation context as you see
39
+ fit. dikw-core is stateless; agents have the context dikw-core doesn't.
40
+
41
+ | route | purpose | when to call |
42
+ | --- | --- | --- |
43
+ | `GET /v1/health` | server self-description (`base_root`, `version`, `storage_engine`, `layer_counts`, `providers`) | first call after attach — confirms the server is up and which base it's bound to |
44
+ | `POST /v1/retrieve` | retrieval-only NDJSON (chunks + page refs, no LLM call) | knowledge access — feed the chunks into your own LLM prompt |
45
+ | `GET /v1/base/pages` | list pages registered in the base, optional `?layer=` filter | discovering page paths to read |
46
+ | `GET /v1/base/pages/{path}` | full page body + chunk anchors aligned to the parsed coordinate space | reading a specific page after a retrieval hit lands you on it |
47
+ | `GET /v1/base/pages/{path}/links` | K-layer link neighbours of a page — `outgoing[]` (edges from this page) + `incoming[]` (edges to this page), optional `?direction=in\|out\|both` and `?limit=N`. Every returned edge resolves to an active document (bare URLs and deactivated dsts are filtered), so you can always feed `dst_path` / `src_path` back into `/v1/base/pages/{path}` | walking the wiki graph one hop without re-parsing `[[wikilink]]` syntax |
48
+ | `GET /v1/base/graph` | whole-base graph in one read: `nodes[]`, `edges[]`, `unresolved[]`, `stats{}`, plus a `base_revision` content hash for cheap caching. Optional `?active=true\|false`. | global graph view (knowledge-graph UI, connectivity analysis) without N requests |
49
+ | `GET /v1/assets/{asset_id}` | stream the raw bytes of a content-addressed asset (sha256 hex id). Immutable; `ETag` + `Cache-Control: public, max-age=31536000, immutable`. Look up the id via the `assets[]` array on `GET /v1/base/pages/{path}` | rendering images embedded in a page response, or piping a binary into another tool |
50
+ | `POST /v1/ingest` | ingest whatever is on disk under `<base>/sources/` (loaded there by `POST /v1/import` or by the user dropping files in) | when the user adds/edits markdown and wants the index refreshed |
51
+ | `GET /v1/status`, `POST /v1/lint`, `POST /v1/check` | counts, lint issues, provider connectivity | sanity checks the user may ask for |
52
+
53
+ CLI equivalents — all `dikw client` commands default to JSON output
54
+ suitable for piping into `jq` or an agent loop. Human-readable rendering
55
+ requires opting in via `--format table`:
56
+
57
+ ```
58
+ dikw client health # JSON by default
59
+ dikw client retrieve "your question" --plain # pipe-safe final JSON (chunks + page_refs)
60
+ dikw client pages list # JSON by default
61
+ dikw client pages get sources/notes/alpha.md # JSON
62
+ dikw client pages links wiki/Foo.md # JSON: {outgoing, incoming}
63
+ dikw client graph get # JSON: full base graph in one call
64
+ dikw client assets get <asset_id> --output f # streams bytes to a local file; metadata to stdout JSON
65
+ dikw client import ./local-sources # pre-flights + imports md packages
66
+ dikw client ingest # async-default: prints task-handle JSON, exits 0
67
+ dikw client ingest --wait # rendered progress + report; succeeded=0 / failed=1 / cancelled=130
68
+ dikw client tasks wait <task_id> # block + render on an existing task_id (same exit-code map)
69
+ dikw client tasks events <task_id> # raw EventsPage JSON for a single cursor page
70
+ ```
71
+
72
+ `retrieve` consumes the NDJSON event stream server-side and emits the
73
+ final JSON payload (chunks + page_refs) to stdout. Pass `--plain`
74
+ whenever you pipe, otherwise the "retrieving…" rich banner lands on
75
+ stdout and breaks `jq`. The `--format json` and `--plain` toggles are
76
+ orthogonal: `--format` picks the *final* shape, `--plain` suppresses
77
+ the *intermediate* status. If you want the raw NDJSON event stream,
78
+ talk to `POST /v1/retrieve` over HTTP directly.
79
+
80
+ ## A typical retrieval-augmented loop
81
+
82
+ 1. `GET /v1/health` — confirm the server is up and grab `base_root` so you
83
+ know which base the user pointed it at.
84
+ 2. `POST /v1/retrieve` with the question + a `limit`. Each chunk hit
85
+ carries a `path`, `layer`, `anchor`, `start_off`/`end_off`, plus
86
+ full chunk `text` (on both the intermediate `retrieval_done` partial
87
+ *and* `final.result.chunks`), plus `page_refs` listing the parent
88
+ pages. A streaming agent can prompt off the partial without waiting
89
+ for `final`.
90
+ 3. If you want full pages instead of just chunks, follow the page refs
91
+ with `GET /v1/base/pages/{path}` — that returns the parsed body plus
92
+ anchors so you can re-locate every chunk hit inside the page body.
93
+ 4. To expand context across the wiki graph, call
94
+ `GET /v1/base/pages/{path}/links` on hit pages. The response is
95
+ `{outgoing[{dst_path, link_type, line, anchor}], incoming[{src_doc_id,
96
+ src_path, link_type, line, anchor}]}` — each edge is a hop you can
97
+ immediately re-feed into `GET /v1/base/pages/{path}` without scanning
98
+ wiki bodies for `[[wikilinks]]`.
99
+ 5. Feed the chunks (and any neighbour pages you pulled) into your own
100
+ LLM prompt and produce the final answer client-side. dikw-core does
101
+ not own the synthesis step.
102
+
103
+ ## Things that will trip you up
104
+
105
+ - **Server lifecycle.** `dikw serve` is started manually by the user, not
106
+ by the agent. If you can't connect, surface that to the user — do not
107
+ spawn it yourself.
108
+ - **Auth.** Loopback (default) is open. Non-loopback hosts require a
109
+ `DIKW_SERVER_TOKEN` bearer. The server-bound config is reflected in
110
+ `GET /v1/info`'s `auth_required` field; `/v1/health` is the rich
111
+ bootstrap probe and intentionally omits auth state.
112
+ - **The "base" terminology.** When the docs or CLI say "base", they mean
113
+ the whole bound directory (which contains `sources/`, `wiki/`,
114
+ `wisdom/`, `.dikw/`, `dikw.yml`). The K-layer subdirectory is still
115
+ called `wiki/` on disk — that's intentional, since the user opens it
116
+ in Obsidian. Don't confuse "the dikw base" with "the wiki/ folder
117
+ inside it".
118
+ - **`/v1/base/pages/{path}` is index-driven.** Only paths registered as
119
+ `DocumentRecord` rows resolve. If a markdown file exists on disk but
120
+ hasn't been ingested, the route returns 404. Use `GET /v1/base/pages`
121
+ to enumerate what's actually queryable.
122
+ - **NDJSON streaming vs cursor JSON.** `POST /v1/retrieve` streams
123
+ NDJSON directly on its response body (`type=retrieve_started →
124
+ retrieval_done → final`). The async-task ops
125
+ (`POST /v1/{ingest,synth,distill,eval,lint.propose,lint.apply}`)
126
+ instead return a JSON `TaskHandle` (`{"task_id": "..."}`); follow
127
+ the task by polling **`GET /v1/tasks/{task_id}/events?from_seq=N&limit=M&wait=K`**.
128
+ Each response is one `EventsPage` (cursor JSON, server long-poll up
129
+ to `wait` seconds, capped at 60s) — agents advance via the returned
130
+ `next_from_seq` and stop when `task_status` is terminal AND the
131
+ `final` event has appeared in the rendered page. The final event
132
+ itself carries the result/error payload. There is no SSE.
133
+ - **CLI mirrors the wire contract.** Op commands default to async:
134
+ `dikw client ingest` submits + prints `{task_id, status, events_url,
135
+ wait_command}` and exits 0. Pass `--wait` to block + render + map
136
+ the final status to the standard exit code (succeeded=0, failed=1,
137
+ cancelled=130, client-side timeout=124). Use
138
+ `dikw client tasks events <id>` for raw cursor pages, and
139
+ `dikw client tasks wait <id>` for the same blocking UX applied to
140
+ an existing task_id.
141
+ - **Per-file ingest errors are non-fatal.** A bad markdown file produces
142
+ one `partial` event with `kind=file_error` and lands on
143
+ `IngestReport.errors`, but the run continues. CLI users can pass
144
+ `--strict` to flip this to a hard fail.
145
+
146
+ ## Pointers
147
+
148
+ - [`docs/architecture.md`](./docs/architecture.md) — module map, layer
149
+ contracts, named seams. Worth reading if you're going to do anything
150
+ more than basic retrieval.
151
+ - [`docs/design.md`](./docs/design.md) — approved design doc. Source of
152
+ truth for *intent*; supersedes anything inferred from code shape.
153
+ - [`docs/getting-started.md`](./docs/getting-started.md) — end-user
154
+ walkthrough. Useful as a script when you're guiding a user through
155
+ setup.
156
+ - [`docs/providers.md`](./docs/providers.md) — per-vendor config cookbook
157
+ (MiniMax + Gitee, OpenAI, DeepSeek, Ollama, GLM, Gemini-compat, …) and
158
+ production gotchas around batch size, dim locking, retry, prompt
159
+ caching.
160
+ - [`docs/server.md`](./docs/server.md) — full HTTP wire spec, security
161
+ posture, deployment notes.