biblicus 0.2.0__tar.gz → 0.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {biblicus-0.2.0 → biblicus-0.4.0}/MANIFEST.in +2 -0
- {biblicus-0.2.0 → biblicus-0.4.0}/PKG-INFO +96 -18
- biblicus-0.2.0/src/biblicus.egg-info/PKG-INFO → biblicus-0.4.0/README.md +86 -37
- biblicus-0.4.0/THIRD_PARTY_NOTICES.md +36 -0
- {biblicus-0.2.0 → biblicus-0.4.0}/docs/CORPUS.md +14 -1
- biblicus-0.2.0/docs/CORPUS_WORKFLOWS.md → biblicus-0.4.0/docs/CORPUS_DESIGN.md +2 -6
- biblicus-0.4.0/docs/DEMOS.md +374 -0
- biblicus-0.4.0/docs/EXTRACTION.md +203 -0
- biblicus-0.4.0/docs/FEATURE_INDEX.md +228 -0
- biblicus-0.4.0/docs/ROADMAP.md +200 -0
- biblicus-0.4.0/docs/TESTING.md +53 -0
- biblicus-0.4.0/docs/USER_CONFIGURATION.md +36 -0
- biblicus-0.4.0/docs/conf.py +55 -0
- {biblicus-0.2.0 → biblicus-0.4.0}/docs/index.rst +9 -3
- {biblicus-0.2.0 → biblicus-0.4.0}/features/cli_parsing.feature +5 -0
- {biblicus-0.2.0 → biblicus-0.4.0}/features/content_sniffing.feature +48 -0
- biblicus-0.4.0/features/crawl.feature +81 -0
- {biblicus-0.2.0 → biblicus-0.4.0}/features/environment.py +71 -0
- biblicus-0.4.0/features/extraction_error_handling.feature +32 -0
- biblicus-0.4.0/features/extraction_run_lifecycle.feature +117 -0
- biblicus-0.4.0/features/extraction_selection.feature +72 -0
- biblicus-0.4.0/features/extraction_selection_longest.feature +66 -0
- biblicus-0.4.0/features/extractor_pipeline.feature +105 -0
- biblicus-0.4.0/features/integration_audio_samples.feature +13 -0
- biblicus-0.4.0/features/integration_image_samples.feature +11 -0
- biblicus-0.4.0/features/integration_mixed_corpus.feature +15 -0
- biblicus-0.4.0/features/integration_mixed_extraction.feature +15 -0
- biblicus-0.4.0/features/integration_ocr_image_extraction.feature +11 -0
- biblicus-0.4.0/features/integration_pdf_retrieval.feature +20 -0
- biblicus-0.4.0/features/integration_unstructured_extraction.feature +11 -0
- biblicus-0.4.0/features/ocr_extractor.feature +61 -0
- biblicus-0.4.0/features/pdf_text_extraction.feature +41 -0
- {biblicus-0.2.0 → biblicus-0.4.0}/features/retrieval_uses_extraction_run.feature +17 -0
- {biblicus-0.2.0 → biblicus-0.4.0}/features/steps/backend_steps.py +3 -1
- {biblicus-0.2.0 → biblicus-0.4.0}/features/steps/cli_parsing_steps.py +20 -1
- {biblicus-0.2.0 → biblicus-0.4.0}/features/steps/cli_steps.py +67 -14
- biblicus-0.4.0/features/steps/crawl_steps.py +68 -0
- biblicus-0.4.0/features/steps/extraction_run_lifecycle_steps.py +148 -0
- biblicus-0.4.0/features/steps/extraction_steps.py +511 -0
- biblicus-0.4.0/features/steps/extractor_steps.py +97 -0
- {biblicus-0.2.0 → biblicus-0.4.0}/features/steps/frontmatter_steps.py +5 -2
- biblicus-0.4.0/features/steps/openai_steps.py +236 -0
- biblicus-0.4.0/features/steps/pdf_steps.py +115 -0
- {biblicus-0.2.0 → biblicus-0.4.0}/features/steps/python_api_steps.py +16 -14
- biblicus-0.4.0/features/steps/rapidocr_steps.py +145 -0
- {biblicus-0.2.0 → biblicus-0.4.0}/features/steps/retrieval_steps.py +68 -4
- biblicus-0.4.0/features/steps/stt_steps.py +93 -0
- biblicus-0.4.0/features/steps/unstructured_steps.py +143 -0
- biblicus-0.4.0/features/steps/user_config_steps.py +47 -0
- biblicus-0.4.0/features/stt_extractor.feature +139 -0
- {biblicus-0.2.0 → biblicus-0.4.0}/features/text_extraction_runs.feature +7 -7
- biblicus-0.4.0/features/unstructured_extractor.feature +62 -0
- biblicus-0.4.0/features/user_config.feature +39 -0
- {biblicus-0.2.0 → biblicus-0.4.0}/pyproject.toml +38 -3
- biblicus-0.4.0/scripts/download_audio_samples.py +200 -0
- biblicus-0.4.0/scripts/download_image_samples.py +180 -0
- biblicus-0.4.0/scripts/download_mixed_samples.py +239 -0
- {biblicus-0.2.0 → biblicus-0.4.0}/scripts/download_pdf_samples.py +6 -3
- {biblicus-0.2.0 → biblicus-0.4.0}/scripts/download_wikipedia.py +3 -1
- {biblicus-0.2.0 → biblicus-0.4.0}/scripts/test.py +24 -3
- {biblicus-0.2.0 → biblicus-0.4.0}/src/biblicus/__init__.py +2 -2
- biblicus-0.4.0/src/biblicus/_vendor/dotyaml/__init__.py +14 -0
- biblicus-0.4.0/src/biblicus/_vendor/dotyaml/interpolation.py +63 -0
- biblicus-0.4.0/src/biblicus/_vendor/dotyaml/loader.py +181 -0
- biblicus-0.4.0/src/biblicus/_vendor/dotyaml/transformer.py +135 -0
- {biblicus-0.2.0 → biblicus-0.4.0}/src/biblicus/backends/__init__.py +0 -2
- {biblicus-0.2.0 → biblicus-0.4.0}/src/biblicus/backends/base.py +3 -3
- {biblicus-0.2.0 → biblicus-0.4.0}/src/biblicus/backends/scan.py +21 -15
- {biblicus-0.2.0 → biblicus-0.4.0}/src/biblicus/backends/sqlite_full_text_search.py +14 -15
- {biblicus-0.2.0 → biblicus-0.4.0}/src/biblicus/cli.py +177 -53
- {biblicus-0.2.0 → biblicus-0.4.0}/src/biblicus/corpus.py +209 -59
- biblicus-0.4.0/src/biblicus/crawl.py +186 -0
- biblicus-0.4.0/src/biblicus/errors.py +15 -0
- {biblicus-0.2.0 → biblicus-0.4.0}/src/biblicus/evaluation.py +4 -8
- biblicus-0.4.0/src/biblicus/extraction.py +531 -0
- {biblicus-0.2.0 → biblicus-0.4.0}/src/biblicus/extractors/__init__.py +14 -3
- {biblicus-0.2.0 → biblicus-0.4.0}/src/biblicus/extractors/base.py +12 -5
- {biblicus-0.2.0 → biblicus-0.4.0}/src/biblicus/extractors/metadata_text.py +13 -5
- biblicus-0.4.0/src/biblicus/extractors/openai_stt.py +180 -0
- {biblicus-0.2.0 → biblicus-0.4.0}/src/biblicus/extractors/pass_through_text.py +16 -6
- biblicus-0.4.0/src/biblicus/extractors/pdf_text.py +100 -0
- biblicus-0.4.0/src/biblicus/extractors/pipeline.py +105 -0
- biblicus-0.4.0/src/biblicus/extractors/rapidocr_text.py +129 -0
- biblicus-0.4.0/src/biblicus/extractors/select_longest_text.py +105 -0
- biblicus-0.4.0/src/biblicus/extractors/select_text.py +100 -0
- biblicus-0.4.0/src/biblicus/extractors/unstructured_text.py +100 -0
- {biblicus-0.2.0 → biblicus-0.4.0}/src/biblicus/frontmatter.py +0 -3
- {biblicus-0.2.0 → biblicus-0.4.0}/src/biblicus/hook_logging.py +0 -5
- {biblicus-0.2.0 → biblicus-0.4.0}/src/biblicus/hook_manager.py +3 -5
- {biblicus-0.2.0 → biblicus-0.4.0}/src/biblicus/hooks.py +3 -7
- {biblicus-0.2.0 → biblicus-0.4.0}/src/biblicus/ignore.py +0 -3
- {biblicus-0.2.0 → biblicus-0.4.0}/src/biblicus/models.py +118 -0
- {biblicus-0.2.0 → biblicus-0.4.0}/src/biblicus/retrieval.py +0 -4
- {biblicus-0.2.0 → biblicus-0.4.0}/src/biblicus/sources.py +44 -9
- {biblicus-0.2.0 → biblicus-0.4.0}/src/biblicus/time.py +1 -2
- {biblicus-0.2.0 → biblicus-0.4.0}/src/biblicus/uris.py +3 -4
- biblicus-0.4.0/src/biblicus/user_config.py +138 -0
- biblicus-0.2.0/README.md → biblicus-0.4.0/src/biblicus.egg-info/PKG-INFO +115 -17
- {biblicus-0.2.0 → biblicus-0.4.0}/src/biblicus.egg-info/SOURCES.txt +49 -4
- {biblicus-0.2.0 → biblicus-0.4.0}/src/biblicus.egg-info/requires.txt +12 -0
- biblicus-0.2.0/docs/EXTRACTION.md +0 -86
- biblicus-0.2.0/docs/NEXT_STEPS.md +0 -309
- biblicus-0.2.0/docs/TESTING.md +0 -29
- biblicus-0.2.0/docs/conf.py +0 -31
- biblicus-0.2.0/features/extractor_pipeline.feature +0 -114
- biblicus-0.2.0/features/steps/extraction_steps.py +0 -238
- biblicus-0.2.0/features/steps/extractor_steps.py +0 -54
- biblicus-0.2.0/src/biblicus/extraction.py +0 -330
- biblicus-0.2.0/src/biblicus/extractors/cascade.py +0 -101
- {biblicus-0.2.0 → biblicus-0.4.0}/LICENSE +0 -0
- {biblicus-0.2.0 → biblicus-0.4.0}/datasets/wikipedia_mini.json +0 -0
- {biblicus-0.2.0 → biblicus-0.4.0}/docs/ARCHITECTURE.md +0 -0
- {biblicus-0.2.0 → biblicus-0.4.0}/docs/BACKENDS.md +0 -0
- {biblicus-0.2.0 → biblicus-0.4.0}/docs/api.rst +0 -0
- {biblicus-0.2.0 → biblicus-0.4.0}/features/backend_validation.feature +0 -0
- {biblicus-0.2.0 → biblicus-0.4.0}/features/biblicus_corpus.feature +0 -0
- {biblicus-0.2.0 → biblicus-0.4.0}/features/cli_entrypoint.feature +0 -0
- {biblicus-0.2.0 → biblicus-0.4.0}/features/corpus_edge_cases.feature +0 -0
- {biblicus-0.2.0 → biblicus-0.4.0}/features/corpus_identity.feature +0 -0
- {biblicus-0.2.0 → biblicus-0.4.0}/features/corpus_purge.feature +0 -0
- {biblicus-0.2.0 → biblicus-0.4.0}/features/error_cases.feature +0 -0
- {biblicus-0.2.0 → biblicus-0.4.0}/features/evaluation.feature +0 -0
- {biblicus-0.2.0 → biblicus-0.4.0}/features/extractor_validation.feature +0 -0
- {biblicus-0.2.0 → biblicus-0.4.0}/features/frontmatter.feature +0 -0
- {biblicus-0.2.0 → biblicus-0.4.0}/features/hook_config_validation.feature +0 -0
- {biblicus-0.2.0 → biblicus-0.4.0}/features/hook_error_handling.feature +0 -0
- {biblicus-0.2.0 → biblicus-0.4.0}/features/import_tree.feature +0 -0
- {biblicus-0.2.0 → biblicus-0.4.0}/features/ingest_sources.feature +0 -0
- {biblicus-0.2.0 → biblicus-0.4.0}/features/integration_pdf_samples.feature +0 -0
- {biblicus-0.2.0 → biblicus-0.4.0}/features/integration_wikipedia.feature +0 -0
- {biblicus-0.2.0 → biblicus-0.4.0}/features/lifecycle_hooks.feature +0 -0
- {biblicus-0.2.0 → biblicus-0.4.0}/features/model_validation.feature +0 -0
- {biblicus-0.2.0 → biblicus-0.4.0}/features/python_api.feature +0 -0
- {biblicus-0.2.0 → biblicus-0.4.0}/features/python_hook_logging.feature +0 -0
- {biblicus-0.2.0 → biblicus-0.4.0}/features/retrieval_budget.feature +0 -0
- {biblicus-0.2.0 → biblicus-0.4.0}/features/retrieval_scan.feature +0 -0
- {biblicus-0.2.0 → biblicus-0.4.0}/features/retrieval_sqlite_full_text_search.feature +0 -0
- {biblicus-0.2.0 → biblicus-0.4.0}/features/retrieval_utilities.feature +0 -0
- {biblicus-0.2.0 → biblicus-0.4.0}/features/source_loading.feature +0 -0
- {biblicus-0.2.0 → biblicus-0.4.0}/features/steps/model_steps.py +0 -0
- {biblicus-0.2.0 → biblicus-0.4.0}/features/streaming_ingest.feature +0 -0
- {biblicus-0.2.0 → biblicus-0.4.0}/setup.cfg +0 -0
- {biblicus-0.2.0 → biblicus-0.4.0}/src/biblicus/__main__.py +0 -0
- {biblicus-0.2.0 → biblicus-0.4.0}/src/biblicus/constants.py +0 -0
- {biblicus-0.2.0 → biblicus-0.4.0}/src/biblicus.egg-info/dependency_links.txt +0 -0
- {biblicus-0.2.0 → biblicus-0.4.0}/src/biblicus.egg-info/entry_points.txt +0 -0
- {biblicus-0.2.0 → biblicus-0.4.0}/src/biblicus.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: biblicus
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.0
|
|
4
4
|
Summary: Command line interface and Python library for corpus ingestion, retrieval, and evaluation.
|
|
5
5
|
License: MIT
|
|
6
6
|
Requires-Python: >=3.9
|
|
@@ -8,20 +8,30 @@ Description-Content-Type: text/markdown
|
|
|
8
8
|
License-File: LICENSE
|
|
9
9
|
Requires-Dist: pydantic>=2.0
|
|
10
10
|
Requires-Dist: PyYAML>=6.0
|
|
11
|
+
Requires-Dist: pypdf>=4.0
|
|
11
12
|
Provides-Extra: dev
|
|
12
13
|
Requires-Dist: behave>=1.2.6; extra == "dev"
|
|
13
14
|
Requires-Dist: coverage[toml]>=7.0; extra == "dev"
|
|
14
15
|
Requires-Dist: sphinx>=7.0; extra == "dev"
|
|
15
16
|
Requires-Dist: myst-parser>=2.0; extra == "dev"
|
|
17
|
+
Requires-Dist: sphinx_rtd_theme>=2.0; extra == "dev"
|
|
16
18
|
Requires-Dist: ruff>=0.4.0; extra == "dev"
|
|
17
19
|
Requires-Dist: black>=24.0; extra == "dev"
|
|
18
20
|
Requires-Dist: python-semantic-release>=9.0.0; extra == "dev"
|
|
21
|
+
Provides-Extra: openai
|
|
22
|
+
Requires-Dist: openai>=1.0; extra == "openai"
|
|
23
|
+
Provides-Extra: unstructured
|
|
24
|
+
Requires-Dist: unstructured>=0.12.0; extra == "unstructured"
|
|
25
|
+
Requires-Dist: python-docx>=1.1.0; extra == "unstructured"
|
|
26
|
+
Provides-Extra: ocr
|
|
27
|
+
Requires-Dist: rapidocr-onnxruntime>=1.3.0; extra == "ocr"
|
|
19
28
|
Dynamic: license-file
|
|
20
29
|
|
|
21
30
|
# Biblicus
|
|
22
31
|
|
|
23
32
|
![Continuous integration][continuous-integration-badge]
|
|
24
33
|
![Coverage][coverage-badge]
|
|
34
|
+
![Documentation][documentation-badge]
|
|
25
35
|
|
|
26
36
|
Make your documents usable by your assistant, then decide later how you will search and retrieve them.
|
|
27
37
|
|
|
@@ -31,28 +41,34 @@ The first practical problem is not retrieval. It is collection and care. You nee
|
|
|
31
41
|
|
|
32
42
|
This library gives you a corpus, which is a normal folder on disk. It stores each ingested item as a file, with optional metadata stored next to it. You can open and inspect the raw files directly. Any derived catalog or index can be rebuilt from the raw corpus.
|
|
33
43
|
|
|
34
|
-
It
|
|
44
|
+
It can be used alongside LangChain, Tactus, Pydantic AI, or the agent development kit. Use it from Python or from the command line interface.
|
|
35
45
|
|
|
36
46
|
See [retrieval augmented generation overview] for a short introduction to the idea.
|
|
37
47
|
|
|
38
|
-
##
|
|
48
|
+
## A beginner friendly mental model
|
|
39
49
|
|
|
40
|
-
|
|
50
|
+
Think in three stages.
|
|
51
|
+
|
|
52
|
+
- Ingest puts raw items into a corpus. This is file first and human inspectable.
|
|
53
|
+
- Extract turns items into usable text. This is where you would do text extraction from Portable Document Format files, optical character recognition for images, or speech to text for audio. If an item is already text, extraction can simply read it. Extraction outputs are derived artifacts, not edits to the raw files.
|
|
54
|
+
- Retrieve searches extracted text and returns evidence. Evidence is structured so you can turn it into context for your model call in whatever way your project prefers.
|
|
55
|
+
|
|
56
|
+
If you learn a few project words, the rest of the system becomes predictable.
|
|
41
57
|
|
|
42
58
|
- Corpus is the folder that holds raw items and their metadata.
|
|
43
|
-
- Item is the raw bytes
|
|
59
|
+
- Item is the raw bytes plus optional metadata and source information.
|
|
44
60
|
- Catalog is the rebuildable index of the corpus.
|
|
45
|
-
-
|
|
46
|
-
- Run is a recorded retrieval build for a corpus.
|
|
61
|
+
- Extraction run is a recorded extraction build that produces text artifacts.
|
|
47
62
|
- Backend is a pluggable retrieval implementation.
|
|
48
|
-
-
|
|
49
|
-
-
|
|
63
|
+
- Run is a recorded retrieval build for a corpus.
|
|
64
|
+
- Evidence is what retrieval returns, with identifiers and source information.
|
|
50
65
|
|
|
51
66
|
## Diagram
|
|
52
67
|
|
|
53
68
|
This diagram shows how a corpus becomes evidence for an assistant.
|
|
54
|
-
|
|
55
|
-
The
|
|
69
|
+
Extraction is introduced here as a separate stage so you can swap extraction approaches without changing the raw corpus.
|
|
70
|
+
The legend shows what the block styles mean.
|
|
71
|
+
Your code is where you decide how to turn evidence into context and how to call a model.
|
|
56
72
|
|
|
57
73
|
```mermaid
|
|
58
74
|
%%{init: {"flowchart": {"useMaxWidth": true, "nodeSpacing": 18, "rankSpacing": 22}}}%%
|
|
@@ -74,12 +90,19 @@ flowchart LR
|
|
|
74
90
|
Raw --> Catalog[Catalog file]
|
|
75
91
|
end
|
|
76
92
|
|
|
77
|
-
subgraph
|
|
93
|
+
subgraph PluggableExtractionPipeline[Pluggable: extraction pipeline]
|
|
94
|
+
direction TB
|
|
95
|
+
Catalog --> Extract[Extract pipeline]
|
|
96
|
+
Extract --> ExtractedText[Extracted text artifacts]
|
|
97
|
+
ExtractedText --> ExtractionRun[Extraction run manifest]
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
subgraph PluggableRetrievalBackend[Pluggable: retrieval backend]
|
|
78
101
|
direction LR
|
|
79
102
|
|
|
80
103
|
subgraph BackendIngestionIndexing[Ingestion and indexing]
|
|
81
104
|
direction TB
|
|
82
|
-
|
|
105
|
+
ExtractionRun --> Build[Build run]
|
|
83
106
|
Build --> BackendIndex[Backend index]
|
|
84
107
|
BackendIndex --> Run[Run manifest]
|
|
85
108
|
end
|
|
@@ -100,6 +123,7 @@ flowchart LR
|
|
|
100
123
|
end
|
|
101
124
|
|
|
102
125
|
style StableCore fill:#ffffff,stroke:#8e24aa,stroke-width:2px,color:#111111
|
|
126
|
+
style PluggableExtractionPipeline fill:#ffffff,stroke:#5e35b1,stroke-dasharray:6 3,stroke-width:2px,color:#111111
|
|
103
127
|
style PluggableRetrievalBackend fill:#ffffff,stroke:#1e88e5,stroke-dasharray:6 3,stroke-width:2px,color:#111111
|
|
104
128
|
style YourCode fill:#ffffff,stroke:#d81b60,stroke-width:2px,color:#111111
|
|
105
129
|
style BackendIngestionIndexing fill:#ffffff,stroke:#cfd8dc,color:#111111
|
|
@@ -107,6 +131,8 @@ flowchart LR
|
|
|
107
131
|
|
|
108
132
|
style Raw fill:#f3e5f5,stroke:#8e24aa,color:#111111
|
|
109
133
|
style Catalog fill:#f3e5f5,stroke:#8e24aa,color:#111111
|
|
134
|
+
style ExtractedText fill:#f3e5f5,stroke:#8e24aa,color:#111111
|
|
135
|
+
style ExtractionRun fill:#f3e5f5,stroke:#8e24aa,color:#111111
|
|
110
136
|
style BackendIndex fill:#f3e5f5,stroke:#8e24aa,color:#111111
|
|
111
137
|
style Run fill:#f3e5f5,stroke:#8e24aa,color:#111111
|
|
112
138
|
style Evidence fill:#f3e5f5,stroke:#8e24aa,color:#111111
|
|
@@ -115,6 +141,7 @@ flowchart LR
|
|
|
115
141
|
style Source fill:#f3e5f5,stroke:#8e24aa,color:#111111
|
|
116
142
|
|
|
117
143
|
style Ingest fill:#eceff1,stroke:#90a4ae,color:#111111
|
|
144
|
+
style Extract fill:#eceff1,stroke:#90a4ae,color:#111111
|
|
118
145
|
style Build fill:#eceff1,stroke:#90a4ae,color:#111111
|
|
119
146
|
style Query fill:#eceff1,stroke:#90a4ae,color:#111111
|
|
120
147
|
style Model fill:#eceff1,stroke:#90a4ae,color:#111111
|
|
@@ -136,6 +163,8 @@ flowchart LR
|
|
|
136
163
|
|
|
137
164
|
- Initialize a corpus folder.
|
|
138
165
|
- Ingest items from file paths, web addresses, or text input.
|
|
166
|
+
- Crawl a website section into corpus items when you want a repeatable “import from the web” workflow.
|
|
167
|
+
- Run extraction when you want derived text artifacts from non-text sources.
|
|
139
168
|
- Reindex to refresh the catalog after edits.
|
|
140
169
|
- Build a retrieval run with a backend.
|
|
141
170
|
- Query the run to collect evidence and evaluate it with datasets.
|
|
@@ -154,17 +183,40 @@ After the first release, you can install it from Python Package Index.
|
|
|
154
183
|
python3 -m pip install biblicus
|
|
155
184
|
```
|
|
156
185
|
|
|
186
|
+
### Optional extras
|
|
187
|
+
|
|
188
|
+
Some extractors are optional so the base install stays small.
|
|
189
|
+
|
|
190
|
+
- Optical character recognition for images: `python3 -m pip install "biblicus[ocr]"`
|
|
191
|
+
- Speech to text transcription: `python3 -m pip install "biblicus[openai]"` (requires an OpenAI API key in `~/.biblicus/config.yml` or `./.biblicus/config.yml`)
|
|
192
|
+
- Broad document parsing fallback: `python3 -m pip install "biblicus[unstructured]"`
|
|
193
|
+
|
|
157
194
|
## Quick start
|
|
158
195
|
|
|
159
196
|
```
|
|
197
|
+
mkdir -p notes
|
|
198
|
+
echo "A small file note" > notes/example.txt
|
|
199
|
+
|
|
160
200
|
biblicus init corpora/example
|
|
161
201
|
biblicus ingest --corpus corpora/example notes/example.txt
|
|
162
202
|
echo "A short note" | biblicus ingest --corpus corpora/example --stdin --title "First note"
|
|
163
203
|
biblicus list --corpus corpora/example
|
|
204
|
+
biblicus extract build --corpus corpora/example --step pass-through-text --step metadata-text
|
|
205
|
+
biblicus extract list --corpus corpora/example
|
|
164
206
|
biblicus build --corpus corpora/example --backend scan
|
|
165
207
|
biblicus query --corpus corpora/example --query "note"
|
|
166
208
|
```
|
|
167
209
|
|
|
210
|
+
If you want to turn a website section into corpus items, crawl a root web address while restricting the crawl to an allowed prefix:
|
|
211
|
+
|
|
212
|
+
```
|
|
213
|
+
biblicus crawl --corpus corpora/example \\
|
|
214
|
+
--root-url https://example.com/docs/index.html \\
|
|
215
|
+
--allowed-prefix https://example.com/docs/ \\
|
|
216
|
+
--max-items 50 \\
|
|
217
|
+
--tag crawled
|
|
218
|
+
```
|
|
219
|
+
|
|
168
220
|
## Python usage
|
|
169
221
|
|
|
170
222
|
From Python, the same flow is available through the Corpus class and backend interfaces. The public surface area is small on purpose.
|
|
@@ -188,13 +240,18 @@ In an assistant system, retrieval usually produces context for a model call. Thi
|
|
|
188
240
|
|
|
189
241
|
## Learn more
|
|
190
242
|
|
|
243
|
+
Full documentation is published on GitHub Pages: https://anthusai.github.io/Biblicus/
|
|
244
|
+
|
|
191
245
|
The documents below are written to be read in order.
|
|
192
246
|
|
|
193
247
|
- [Architecture][architecture]
|
|
248
|
+
- [Roadmap][roadmap]
|
|
249
|
+
- [Feature index][feature-index]
|
|
194
250
|
- [Corpus][corpus]
|
|
195
251
|
- [Text extraction][text-extraction]
|
|
252
|
+
- [User configuration][user-configuration]
|
|
196
253
|
- [Backends][backends]
|
|
197
|
-
- [
|
|
254
|
+
- [Demos][demos]
|
|
198
255
|
- [Testing][testing]
|
|
199
256
|
|
|
200
257
|
## Metadata and catalog
|
|
@@ -212,7 +269,16 @@ corpus/
|
|
|
212
269
|
config.json
|
|
213
270
|
catalog.json
|
|
214
271
|
runs/
|
|
215
|
-
|
|
272
|
+
extraction/
|
|
273
|
+
pipeline/
|
|
274
|
+
<run id>/
|
|
275
|
+
manifest.json
|
|
276
|
+
text/
|
|
277
|
+
<item id>.txt
|
|
278
|
+
retrieval/
|
|
279
|
+
<backend id>/
|
|
280
|
+
<run id>/
|
|
281
|
+
manifest.json
|
|
216
282
|
```
|
|
217
283
|
|
|
218
284
|
## Retrieval backends
|
|
@@ -252,10 +318,18 @@ Publishing uses a Python Package Index token stored in the GitHub secret named P
|
|
|
252
318
|
|
|
253
319
|
## Documentation
|
|
254
320
|
|
|
255
|
-
Reference documentation is generated from Sphinx style docstrings.
|
|
321
|
+
Reference documentation is generated from Sphinx style docstrings.
|
|
322
|
+
|
|
323
|
+
Install development dependencies:
|
|
324
|
+
|
|
325
|
+
```
|
|
326
|
+
python3 -m pip install -e ".[dev]"
|
|
327
|
+
```
|
|
328
|
+
|
|
329
|
+
Build the documentation:
|
|
256
330
|
|
|
257
331
|
```
|
|
258
|
-
|
|
332
|
+
python3 -m sphinx -b html docs docs/_build/html
|
|
259
333
|
```
|
|
260
334
|
|
|
261
335
|
## License
|
|
@@ -264,11 +338,15 @@ License terms are in `LICENSE`.
|
|
|
264
338
|
|
|
265
339
|
[retrieval augmented generation overview]: https://en.wikipedia.org/wiki/Retrieval-augmented_generation
|
|
266
340
|
[architecture]: docs/ARCHITECTURE.md
|
|
341
|
+
[roadmap]: docs/ROADMAP.md
|
|
342
|
+
[feature-index]: docs/FEATURE_INDEX.md
|
|
267
343
|
[corpus]: docs/CORPUS.md
|
|
268
344
|
[text-extraction]: docs/EXTRACTION.md
|
|
345
|
+
[user-configuration]: docs/USER_CONFIGURATION.md
|
|
269
346
|
[backends]: docs/BACKENDS.md
|
|
270
|
-
[
|
|
347
|
+
[demos]: docs/DEMOS.md
|
|
271
348
|
[testing]: docs/TESTING.md
|
|
272
349
|
|
|
273
350
|
[continuous-integration-badge]: https://github.com/AnthusAI/Biblicus/actions/workflows/ci.yml/badge.svg?branch=main
|
|
274
351
|
[coverage-badge]: https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/AnthusAI/Biblicus/main/coverage_badge.json
|
|
352
|
+
[documentation-badge]: https://img.shields.io/badge/docs-GitHub%20Pages-blue
|
|
@@ -1,27 +1,8 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: biblicus
|
|
3
|
-
Version: 0.2.0
|
|
4
|
-
Summary: Command line interface and Python library for corpus ingestion, retrieval, and evaluation.
|
|
5
|
-
License: MIT
|
|
6
|
-
Requires-Python: >=3.9
|
|
7
|
-
Description-Content-Type: text/markdown
|
|
8
|
-
License-File: LICENSE
|
|
9
|
-
Requires-Dist: pydantic>=2.0
|
|
10
|
-
Requires-Dist: PyYAML>=6.0
|
|
11
|
-
Provides-Extra: dev
|
|
12
|
-
Requires-Dist: behave>=1.2.6; extra == "dev"
|
|
13
|
-
Requires-Dist: coverage[toml]>=7.0; extra == "dev"
|
|
14
|
-
Requires-Dist: sphinx>=7.0; extra == "dev"
|
|
15
|
-
Requires-Dist: myst-parser>=2.0; extra == "dev"
|
|
16
|
-
Requires-Dist: ruff>=0.4.0; extra == "dev"
|
|
17
|
-
Requires-Dist: black>=24.0; extra == "dev"
|
|
18
|
-
Requires-Dist: python-semantic-release>=9.0.0; extra == "dev"
|
|
19
|
-
Dynamic: license-file
|
|
20
|
-
|
|
21
1
|
# Biblicus
|
|
22
2
|
|
|
23
3
|
![Continuous integration][continuous-integration-badge]
|
|
24
4
|
![Coverage][coverage-badge]
|
|
5
|
+
![Documentation][documentation-badge]
|
|
25
6
|
|
|
26
7
|
Make your documents usable by your assistant, then decide later how you will search and retrieve them.
|
|
27
8
|
|
|
@@ -31,28 +12,34 @@ The first practical problem is not retrieval. It is collection and care. You nee
|
|
|
31
12
|
|
|
32
13
|
This library gives you a corpus, which is a normal folder on disk. It stores each ingested item as a file, with optional metadata stored next to it. You can open and inspect the raw files directly. Any derived catalog or index can be rebuilt from the raw corpus.
|
|
33
14
|
|
|
34
|
-
It
|
|
15
|
+
It can be used alongside LangChain, Tactus, Pydantic AI, or the agent development kit. Use it from Python or from the command line interface.
|
|
35
16
|
|
|
36
17
|
See [retrieval augmented generation overview] for a short introduction to the idea.
|
|
37
18
|
|
|
38
|
-
##
|
|
19
|
+
## A beginner friendly mental model
|
|
20
|
+
|
|
21
|
+
Think in three stages.
|
|
22
|
+
|
|
23
|
+
- Ingest puts raw items into a corpus. This is file first and human inspectable.
|
|
24
|
+
- Extract turns items into usable text. This is where you would do text extraction from Portable Document Format files, optical character recognition for images, or speech to text for audio. If an item is already text, extraction can simply read it. Extraction outputs are derived artifacts, not edits to the raw files.
|
|
25
|
+
- Retrieve searches extracted text and returns evidence. Evidence is structured so you can turn it into context for your model call in whatever way your project prefers.
|
|
39
26
|
|
|
40
|
-
|
|
27
|
+
If you learn a few project words, the rest of the system becomes predictable.
|
|
41
28
|
|
|
42
29
|
- Corpus is the folder that holds raw items and their metadata.
|
|
43
|
-
- Item is the raw bytes
|
|
30
|
+
- Item is the raw bytes plus optional metadata and source information.
|
|
44
31
|
- Catalog is the rebuildable index of the corpus.
|
|
45
|
-
-
|
|
46
|
-
- Run is a recorded retrieval build for a corpus.
|
|
32
|
+
- Extraction run is a recorded extraction build that produces text artifacts.
|
|
47
33
|
- Backend is a pluggable retrieval implementation.
|
|
48
|
-
-
|
|
49
|
-
-
|
|
34
|
+
- Run is a recorded retrieval build for a corpus.
|
|
35
|
+
- Evidence is what retrieval returns, with identifiers and source information.
|
|
50
36
|
|
|
51
37
|
## Diagram
|
|
52
38
|
|
|
53
39
|
This diagram shows how a corpus becomes evidence for an assistant.
|
|
54
|
-
|
|
55
|
-
The
|
|
40
|
+
Extraction is introduced here as a separate stage so you can swap extraction approaches without changing the raw corpus.
|
|
41
|
+
The legend shows what the block styles mean.
|
|
42
|
+
Your code is where you decide how to turn evidence into context and how to call a model.
|
|
56
43
|
|
|
57
44
|
```mermaid
|
|
58
45
|
%%{init: {"flowchart": {"useMaxWidth": true, "nodeSpacing": 18, "rankSpacing": 22}}}%%
|
|
@@ -74,12 +61,19 @@ flowchart LR
|
|
|
74
61
|
Raw --> Catalog[Catalog file]
|
|
75
62
|
end
|
|
76
63
|
|
|
77
|
-
subgraph
|
|
64
|
+
subgraph PluggableExtractionPipeline[Pluggable: extraction pipeline]
|
|
65
|
+
direction TB
|
|
66
|
+
Catalog --> Extract[Extract pipeline]
|
|
67
|
+
Extract --> ExtractedText[Extracted text artifacts]
|
|
68
|
+
ExtractedText --> ExtractionRun[Extraction run manifest]
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
subgraph PluggableRetrievalBackend[Pluggable: retrieval backend]
|
|
78
72
|
direction LR
|
|
79
73
|
|
|
80
74
|
subgraph BackendIngestionIndexing[Ingestion and indexing]
|
|
81
75
|
direction TB
|
|
82
|
-
|
|
76
|
+
ExtractionRun --> Build[Build run]
|
|
83
77
|
Build --> BackendIndex[Backend index]
|
|
84
78
|
BackendIndex --> Run[Run manifest]
|
|
85
79
|
end
|
|
@@ -100,6 +94,7 @@ flowchart LR
|
|
|
100
94
|
end
|
|
101
95
|
|
|
102
96
|
style StableCore fill:#ffffff,stroke:#8e24aa,stroke-width:2px,color:#111111
|
|
97
|
+
style PluggableExtractionPipeline fill:#ffffff,stroke:#5e35b1,stroke-dasharray:6 3,stroke-width:2px,color:#111111
|
|
103
98
|
style PluggableRetrievalBackend fill:#ffffff,stroke:#1e88e5,stroke-dasharray:6 3,stroke-width:2px,color:#111111
|
|
104
99
|
style YourCode fill:#ffffff,stroke:#d81b60,stroke-width:2px,color:#111111
|
|
105
100
|
style BackendIngestionIndexing fill:#ffffff,stroke:#cfd8dc,color:#111111
|
|
@@ -107,6 +102,8 @@ flowchart LR
|
|
|
107
102
|
|
|
108
103
|
style Raw fill:#f3e5f5,stroke:#8e24aa,color:#111111
|
|
109
104
|
style Catalog fill:#f3e5f5,stroke:#8e24aa,color:#111111
|
|
105
|
+
style ExtractedText fill:#f3e5f5,stroke:#8e24aa,color:#111111
|
|
106
|
+
style ExtractionRun fill:#f3e5f5,stroke:#8e24aa,color:#111111
|
|
110
107
|
style BackendIndex fill:#f3e5f5,stroke:#8e24aa,color:#111111
|
|
111
108
|
style Run fill:#f3e5f5,stroke:#8e24aa,color:#111111
|
|
112
109
|
style Evidence fill:#f3e5f5,stroke:#8e24aa,color:#111111
|
|
@@ -115,6 +112,7 @@ flowchart LR
|
|
|
115
112
|
style Source fill:#f3e5f5,stroke:#8e24aa,color:#111111
|
|
116
113
|
|
|
117
114
|
style Ingest fill:#eceff1,stroke:#90a4ae,color:#111111
|
|
115
|
+
style Extract fill:#eceff1,stroke:#90a4ae,color:#111111
|
|
118
116
|
style Build fill:#eceff1,stroke:#90a4ae,color:#111111
|
|
119
117
|
style Query fill:#eceff1,stroke:#90a4ae,color:#111111
|
|
120
118
|
style Model fill:#eceff1,stroke:#90a4ae,color:#111111
|
|
@@ -136,6 +134,8 @@ flowchart LR
|
|
|
136
134
|
|
|
137
135
|
- Initialize a corpus folder.
|
|
138
136
|
- Ingest items from file paths, web addresses, or text input.
|
|
137
|
+
- Crawl a website section into corpus items when you want a repeatable “import from the web” workflow.
|
|
138
|
+
- Run extraction when you want derived text artifacts from non-text sources.
|
|
139
139
|
- Reindex to refresh the catalog after edits.
|
|
140
140
|
- Build a retrieval run with a backend.
|
|
141
141
|
- Query the run to collect evidence and evaluate it with datasets.
|
|
@@ -154,17 +154,40 @@ After the first release, you can install it from Python Package Index.
|
|
|
154
154
|
python3 -m pip install biblicus
|
|
155
155
|
```
|
|
156
156
|
|
|
157
|
+
### Optional extras
|
|
158
|
+
|
|
159
|
+
Some extractors are optional so the base install stays small.
|
|
160
|
+
|
|
161
|
+
- Optical character recognition for images: `python3 -m pip install "biblicus[ocr]"`
|
|
162
|
+
- Speech to text transcription: `python3 -m pip install "biblicus[openai]"` (requires an OpenAI API key in `~/.biblicus/config.yml` or `./.biblicus/config.yml`)
|
|
163
|
+
- Broad document parsing fallback: `python3 -m pip install "biblicus[unstructured]"`
|
|
164
|
+
|
|
157
165
|
## Quick start
|
|
158
166
|
|
|
159
167
|
```
|
|
168
|
+
mkdir -p notes
|
|
169
|
+
echo "A small file note" > notes/example.txt
|
|
170
|
+
|
|
160
171
|
biblicus init corpora/example
|
|
161
172
|
biblicus ingest --corpus corpora/example notes/example.txt
|
|
162
173
|
echo "A short note" | biblicus ingest --corpus corpora/example --stdin --title "First note"
|
|
163
174
|
biblicus list --corpus corpora/example
|
|
175
|
+
biblicus extract build --corpus corpora/example --step pass-through-text --step metadata-text
|
|
176
|
+
biblicus extract list --corpus corpora/example
|
|
164
177
|
biblicus build --corpus corpora/example --backend scan
|
|
165
178
|
biblicus query --corpus corpora/example --query "note"
|
|
166
179
|
```
|
|
167
180
|
|
|
181
|
+
If you want to turn a website section into corpus items, crawl a root web address while restricting the crawl to an allowed prefix:
|
|
182
|
+
|
|
183
|
+
```
|
|
184
|
+
biblicus crawl --corpus corpora/example \\
|
|
185
|
+
--root-url https://example.com/docs/index.html \\
|
|
186
|
+
--allowed-prefix https://example.com/docs/ \\
|
|
187
|
+
--max-items 50 \\
|
|
188
|
+
--tag crawled
|
|
189
|
+
```
|
|
190
|
+
|
|
168
191
|
## Python usage
|
|
169
192
|
|
|
170
193
|
From Python, the same flow is available through the Corpus class and backend interfaces. The public surface area is small on purpose.
|
|
@@ -188,13 +211,18 @@ In an assistant system, retrieval usually produces context for a model call. Thi
|
|
|
188
211
|
|
|
189
212
|
## Learn more
|
|
190
213
|
|
|
214
|
+
Full documentation is published on GitHub Pages: https://anthusai.github.io/Biblicus/
|
|
215
|
+
|
|
191
216
|
The documents below are written to be read in order.
|
|
192
217
|
|
|
193
218
|
- [Architecture][architecture]
|
|
219
|
+
- [Roadmap][roadmap]
|
|
220
|
+
- [Feature index][feature-index]
|
|
194
221
|
- [Corpus][corpus]
|
|
195
222
|
- [Text extraction][text-extraction]
|
|
223
|
+
- [User configuration][user-configuration]
|
|
196
224
|
- [Backends][backends]
|
|
197
|
-
- [
|
|
225
|
+
- [Demos][demos]
|
|
198
226
|
- [Testing][testing]
|
|
199
227
|
|
|
200
228
|
## Metadata and catalog
|
|
@@ -212,7 +240,16 @@ corpus/
|
|
|
212
240
|
config.json
|
|
213
241
|
catalog.json
|
|
214
242
|
runs/
|
|
215
|
-
|
|
243
|
+
extraction/
|
|
244
|
+
pipeline/
|
|
245
|
+
<run id>/
|
|
246
|
+
manifest.json
|
|
247
|
+
text/
|
|
248
|
+
<item id>.txt
|
|
249
|
+
retrieval/
|
|
250
|
+
<backend id>/
|
|
251
|
+
<run id>/
|
|
252
|
+
manifest.json
|
|
216
253
|
```
|
|
217
254
|
|
|
218
255
|
## Retrieval backends
|
|
@@ -252,10 +289,18 @@ Publishing uses a Python Package Index token stored in the GitHub secret named P
|
|
|
252
289
|
|
|
253
290
|
## Documentation
|
|
254
291
|
|
|
255
|
-
Reference documentation is generated from Sphinx style docstrings.
|
|
292
|
+
Reference documentation is generated from Sphinx style docstrings.
|
|
293
|
+
|
|
294
|
+
Install development dependencies:
|
|
295
|
+
|
|
296
|
+
```
|
|
297
|
+
python3 -m pip install -e ".[dev]"
|
|
298
|
+
```
|
|
299
|
+
|
|
300
|
+
Build the documentation:
|
|
256
301
|
|
|
257
302
|
```
|
|
258
|
-
|
|
303
|
+
python3 -m sphinx -b html docs docs/_build/html
|
|
259
304
|
```
|
|
260
305
|
|
|
261
306
|
## License
|
|
@@ -264,11 +309,15 @@ License terms are in `LICENSE`.
|
|
|
264
309
|
|
|
265
310
|
[retrieval augmented generation overview]: https://en.wikipedia.org/wiki/Retrieval-augmented_generation
|
|
266
311
|
[architecture]: docs/ARCHITECTURE.md
|
|
312
|
+
[roadmap]: docs/ROADMAP.md
|
|
313
|
+
[feature-index]: docs/FEATURE_INDEX.md
|
|
267
314
|
[corpus]: docs/CORPUS.md
|
|
268
315
|
[text-extraction]: docs/EXTRACTION.md
|
|
316
|
+
[user-configuration]: docs/USER_CONFIGURATION.md
|
|
269
317
|
[backends]: docs/BACKENDS.md
|
|
270
|
-
[
|
|
318
|
+
[demos]: docs/DEMOS.md
|
|
271
319
|
[testing]: docs/TESTING.md
|
|
272
320
|
|
|
273
321
|
[continuous-integration-badge]: https://github.com/AnthusAI/Biblicus/actions/workflows/ci.yml/badge.svg?branch=main
|
|
274
322
|
[coverage-badge]: https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/AnthusAI/Biblicus/main/coverage_badge.json
|
|
323
|
+
[documentation-badge]: https://img.shields.io/badge/docs-GitHub%20Pages-blue
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# Third-party notices
|
|
2
|
+
|
|
3
|
+
This project includes vendored third-party source code.
|
|
4
|
+
|
|
5
|
+
## dotyaml
|
|
6
|
+
|
|
7
|
+
Portions of this repository vendor code from the `dotyaml` project.
|
|
8
|
+
|
|
9
|
+
- Project: `dotyaml`
|
|
10
|
+
- Source: `../dotyaml` (vendored into `src/biblicus/_vendor/dotyaml/`)
|
|
11
|
+
- License: MIT
|
|
12
|
+
|
|
13
|
+
```
|
|
14
|
+
MIT License
|
|
15
|
+
|
|
16
|
+
Copyright (c) 2025 yamlenv
|
|
17
|
+
|
|
18
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
19
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
20
|
+
in the Software without restriction, including without limitation the rights
|
|
21
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
22
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
23
|
+
furnished to do so, subject to the following conditions:
|
|
24
|
+
|
|
25
|
+
The above copyright notice and this permission notice shall be included in all
|
|
26
|
+
copies or substantial portions of the Software.
|
|
27
|
+
|
|
28
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
29
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
30
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
31
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
32
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
33
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
34
|
+
SOFTWARE.
|
|
35
|
+
```
|
|
36
|
+
|
|
@@ -43,6 +43,20 @@ Ingest a web address:
|
|
|
43
43
|
python3 -m biblicus ingest --corpus corpora/example https://example.com --tag web
|
|
44
44
|
```
|
|
45
45
|
|
|
46
|
+
## Crawl a website prefix
|
|
47
|
+
|
|
48
|
+
To build a corpus from a website section, crawl a root uniform resource locator and restrict the crawl to an allowed prefix.
|
|
49
|
+
|
|
50
|
+
```
|
|
51
|
+
python3 -m biblicus crawl --corpus corpora/example \\
|
|
52
|
+
--root-url https://example.com/docs/index.html \\
|
|
53
|
+
--allowed-prefix https://example.com/docs/ \\
|
|
54
|
+
--max-items 50 \\
|
|
55
|
+
--tag crawled
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
The crawl command only follows links within the allowed prefix, and it respects `.biblicusignore` patterns against the path relative to the allowed prefix.
|
|
59
|
+
|
|
46
60
|
Ingest a text note:
|
|
47
61
|
|
|
48
62
|
```
|
|
@@ -100,4 +114,3 @@ Purging deletes all items and derived artifacts under the corpus. It requires yo
|
|
|
100
114
|
```
|
|
101
115
|
python3 -m biblicus purge --corpus corpora/example --confirm example
|
|
102
116
|
```
|
|
103
|
-
|
|
@@ -1,13 +1,9 @@
|
|
|
1
|
-
# Corpus
|
|
1
|
+
# Corpus design
|
|
2
2
|
|
|
3
|
-
This document records
|
|
3
|
+
This document records design decisions and outcomes for corpus management and lifecycle hooks in version zero.
|
|
4
4
|
|
|
5
5
|
The goal is to make corpus management practical for day to day use, while keeping the raw corpus durable and readable as ordinary files on disk.
|
|
6
6
|
|
|
7
|
-
## Initiative constraints
|
|
8
|
-
|
|
9
|
-
The project uses strict behavior driven development. Behavior specifications in `features/*.feature` are the authoritative definition of system behavior.
|
|
10
|
-
|
|
11
7
|
## What exists today
|
|
12
8
|
|
|
13
9
|
The project already supports:
|