docex 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. docex-0.2.0/.gitignore +74 -0
  2. docex-0.2.0/CONTRIBUTING.md +46 -0
  3. docex-0.2.0/LICENSE +21 -0
  4. docex-0.2.0/MANIFEST.in +27 -0
  5. docex-0.2.0/PKG-INFO +149 -0
  6. docex-0.2.0/README.md +102 -0
  7. docex-0.2.0/docex/__init__.py +32 -0
  8. docex-0.2.0/docex/cli.py +180 -0
  9. docex-0.2.0/docex/config/__init__.py +0 -0
  10. docex-0.2.0/docex/config/config_manager.py +165 -0
  11. docex-0.2.0/docex/config/docflow_config.py +252 -0
  12. docex-0.2.0/docex/context.py +30 -0
  13. docex-0.2.0/docex/db/__init__.py +0 -0
  14. docex-0.2.0/docex/db/abstract_database.py +268 -0
  15. docex-0.2.0/docex/db/connection.py +286 -0
  16. docex-0.2.0/docex/db/database_factory.py +31 -0
  17. docex-0.2.0/docex/db/models.py +215 -0
  18. docex-0.2.0/docex/db/postgres_database.py +80 -0
  19. docex-0.2.0/docex/db/repository.py +213 -0
  20. docex-0.2.0/docex/db/sqlite_database.py +386 -0
  21. docex-0.2.0/docex/docbasket.py +577 -0
  22. docex-0.2.0/docex/docex.py +631 -0
  23. docex-0.2.0/docex/document.py +170 -0
  24. docex-0.2.0/docex/models/__init__.py +0 -0
  25. docex-0.2.0/docex/models/document_metadata.py +19 -0
  26. docex-0.2.0/docex/models/metadata_keys.py +143 -0
  27. docex-0.2.0/docex/processors/__init__.py +1 -0
  28. docex-0.2.0/docex/processors/base.py +133 -0
  29. docex-0.2.0/docex/processors/csv_to_json.py +76 -0
  30. docex-0.2.0/docex/processors/factory.py +60 -0
  31. docex-0.2.0/docex/processors/mapper.py +39 -0
  32. docex-0.2.0/docex/processors/models.py +10 -0
  33. docex-0.2.0/docex/processors/pdf_invoice.py +44 -0
  34. docex-0.2.0/docex/services/__init__.py +0 -0
  35. docex-0.2.0/docex/services/docbasket_service.py +211 -0
  36. docex-0.2.0/docex/services/document_service.py +357 -0
  37. docex-0.2.0/docex/services/metadata_service.py +83 -0
  38. docex-0.2.0/docex/services/storage_service.py +103 -0
  39. docex-0.2.0/docex/storage/__init__.py +0 -0
  40. docex-0.2.0/docex/storage/abstract_storage.py +157 -0
  41. docex-0.2.0/docex/storage/filesystem_storage.py +278 -0
  42. docex-0.2.0/docex/storage/s3_storage.py +345 -0
  43. docex-0.2.0/docex/storage/storage_factory.py +64 -0
  44. docex-0.2.0/docex/transport/__init__.py +51 -0
  45. docex-0.2.0/docex/transport/base.py +80 -0
  46. docex-0.2.0/docex/transport/config.py +117 -0
  47. docex-0.2.0/docex/transport/http.py +256 -0
  48. docex-0.2.0/docex/transport/local.py +201 -0
  49. docex-0.2.0/docex/transport/models.py +94 -0
  50. docex-0.2.0/docex/transport/repository.py +158 -0
  51. docex-0.2.0/docex/transport/route.py +403 -0
  52. docex-0.2.0/docex/transport/route_mapper.py +60 -0
  53. docex-0.2.0/docex/transport/sftp.py +297 -0
  54. docex-0.2.0/docex/transport/transport_result.py +34 -0
  55. docex-0.2.0/docex/transport/transporter_factory.py +53 -0
  56. docex-0.2.0/docex/utils/__init__.py +1 -0
  57. docex-0.2.0/docex/utils/file_utils.py +58 -0
  58. docex-0.2.0/docex.egg-info/PKG-INFO +149 -0
  59. docex-0.2.0/docex.egg-info/SOURCES.txt +145 -0
  60. docex-0.2.0/docex.egg-info/dependency_links.txt +1 -0
  61. docex-0.2.0/docex.egg-info/entry_points.txt +2 -0
  62. docex-0.2.0/docex.egg-info/requires.txt +10 -0
  63. docex-0.2.0/docex.egg-info/top_level.txt +3 -0
  64. docex-0.2.0/docflow/__init__.py +41 -0
  65. docex-0.2.0/docflow/cli.py +243 -0
  66. docex-0.2.0/docflow/config/__init__.py +0 -0
  67. docex-0.2.0/docflow/config/config_manager.py +108 -0
  68. docex-0.2.0/docflow/config/default_config.yaml +38 -0
  69. docex-0.2.0/docflow/config/docflow_config.py +280 -0
  70. docex-0.2.0/docflow/context.py +30 -0
  71. docex-0.2.0/docflow/db/__init__.py +0 -0
  72. docex-0.2.0/docflow/db/abstract_database.py +268 -0
  73. docex-0.2.0/docflow/db/connection.py +286 -0
  74. docex-0.2.0/docflow/db/database_factory.py +31 -0
  75. docex-0.2.0/docflow/db/models.py +215 -0
  76. docex-0.2.0/docflow/db/postgres_database.py +80 -0
  77. docex-0.2.0/docflow/db/repository.py +213 -0
  78. docex-0.2.0/docflow/db/sqlite_database.py +386 -0
  79. docex-0.2.0/docflow/docbasket.py +577 -0
  80. docex-0.2.0/docflow/docflow.py +631 -0
  81. docex-0.2.0/docflow/document.py +170 -0
  82. docex-0.2.0/docflow/models/__init__.py +0 -0
  83. docex-0.2.0/docflow/models/document_metadata.py +18 -0
  84. docex-0.2.0/docflow/models/metadata_keys.py +143 -0
  85. docex-0.2.0/docflow/processors/__init__.py +1 -0
  86. docex-0.2.0/docflow/processors/base.py +133 -0
  87. docex-0.2.0/docflow/processors/csv_to_json.py +76 -0
  88. docex-0.2.0/docflow/processors/factory.py +60 -0
  89. docex-0.2.0/docflow/processors/mapper.py +37 -0
  90. docex-0.2.0/docflow/processors/models.py +10 -0
  91. docex-0.2.0/docflow/processors/pdf_invoice.py +44 -0
  92. docex-0.2.0/docflow/processors/pdf_to_text.py +40 -0
  93. docex-0.2.0/docflow/services/__init__.py +0 -0
  94. docex-0.2.0/docflow/services/docbasket_service.py +211 -0
  95. docex-0.2.0/docflow/services/document_service.py +357 -0
  96. docex-0.2.0/docflow/services/metadata_service.py +83 -0
  97. docex-0.2.0/docflow/services/storage_service.py +103 -0
  98. docex-0.2.0/docflow/storage/__init__.py +0 -0
  99. docex-0.2.0/docflow/storage/abstract_storage.py +157 -0
  100. docex-0.2.0/docflow/storage/filesystem_storage.py +278 -0
  101. docex-0.2.0/docflow/storage/s3_storage.py +345 -0
  102. docex-0.2.0/docflow/storage/storage_factory.py +64 -0
  103. docex-0.2.0/docflow/transport/__init__.py +51 -0
  104. docex-0.2.0/docflow/transport/base.py +80 -0
  105. docex-0.2.0/docflow/transport/config.py +117 -0
  106. docex-0.2.0/docflow/transport/http.py +256 -0
  107. docex-0.2.0/docflow/transport/local.py +201 -0
  108. docex-0.2.0/docflow/transport/models.py +94 -0
  109. docex-0.2.0/docflow/transport/repository.py +158 -0
  110. docex-0.2.0/docflow/transport/route.py +403 -0
  111. docex-0.2.0/docflow/transport/route_mapper.py +60 -0
  112. docex-0.2.0/docflow/transport/sftp.py +297 -0
  113. docex-0.2.0/docflow/transport/transport_result.py +34 -0
  114. docex-0.2.0/docflow/transport/transporter_factory.py +53 -0
  115. docex-0.2.0/docflow/utils/__init__.py +1 -0
  116. docex-0.2.0/docflow/utils/file_utils.py +58 -0
  117. docex-0.2.0/docs/Developer_Guide.md +374 -0
  118. docex-0.2.0/docs/DocFlow_Design.md +988 -0
  119. docex-0.2.0/examples/basic_usage.py +93 -0
  120. docex-0.2.0/examples/custom_processors/README.md +61 -0
  121. docex-0.2.0/examples/custom_processors/my_pdf_text_processor.py +59 -0
  122. docex-0.2.0/examples/custom_processors/run_custom_pdf_processor.py +71 -0
  123. docex-0.2.0/examples/hello_world.py +43 -0
  124. docex-0.2.0/examples/pdf_invoice_to_purchase_order.py +156 -0
  125. docex-0.2.0/examples/processor_csv_to_json.py +88 -0
  126. docex-0.2.0/examples/route_file_transfer.py +147 -0
  127. docex-0.2.0/examples/route_management.py +153 -0
  128. docex-0.2.0/examples/sample_data/invoice_2001321.pdf +0 -0
  129. docex-0.2.0/pyproject.toml +29 -0
  130. docex-0.2.0/setup.cfg +4 -0
  131. docex-0.2.0/setup.py +36 -0
  132. docex-0.2.0/tests/__init__.py +1 -0
  133. docex-0.2.0/tests/test_db_connection.py +43 -0
  134. docex-0.2.0/tests/test_docbasket.py +347 -0
  135. docex-0.2.0/tests/test_docflow.py +283 -0
  136. docex-0.2.0/tests/test_docflow_postgres.py +289 -0
  137. docex-0.2.0/tests/test_docflow_usage.py +98 -0
  138. docex-0.2.0/tests/test_route.py +127 -0
  139. docex-0.2.0/tests/test_route_management.py +172 -0
  140. docex-0.2.0/tests/test_simple.py +182 -0
  141. docex-0.2.0/tests/test_sqlite.py +358 -0
  142. docex-0.2.0/tests/test_transport_base.py +243 -0
  143. docex-0.2.0/tests/test_transport_integration.py +249 -0
  144. docex-0.2.0/tests/test_transport_postgres.py +268 -0
  145. docex-0.2.0/tests/test_transport_sqlite.py +38 -0
  146. docex-0.2.0/tests/test_transport_workflow.py +111 -0
  147. docex-0.2.0/tests/verify_routes.py +20 -0
docex-0.2.0/.gitignore ADDED
@@ -0,0 +1,74 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ env/
12
+ venv/
13
+ build/
14
+ develop-eggs/
15
+ dist/
16
+ downloads/
17
+ eggs/
18
+ .eggs/
19
+ lib/
20
+ lib64/
21
+ parts/
22
+ sdist/
23
+ var/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+
28
+ # PyInstaller
29
+ # Usually these files are written by a python script from a template
30
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
31
+ *.manifest
32
+ *.spec
33
+
34
+ # Installer logs
35
+ pip-log.txt
36
+ pip-delete-this-directory.txt
37
+
38
+ # Unit test / coverage reports
39
+ htmlcov/
40
+ .tox/
41
+ .nox/
42
+ .coverage
43
+ .coverage.*
44
+ .cache
45
+ nosetests.xml
46
+ coverage.xml
47
+ *.cover
48
+ .hypothesis/
49
+ .pytest_cache/
50
+
51
+ # Jupyter Notebook
52
+ .ipynb_checkpoints
53
+
54
+ # pyenv
55
+ .python-version
56
+
57
+ # mypy
58
+ .mypy_cache/
59
+ .dmypy.json
60
+
61
+ # Pyre type checker
62
+ .pyre/
63
+
64
+ # VS Code
65
+ .vscode/
66
+
67
+ # macOS
68
+ .DS_Store
69
+
70
+ # Local files
71
+ *.db
72
+ *.sqlite3
73
+ temp_download/
74
+ example_docs/
@@ -0,0 +1,46 @@
1
+ # Contributing to DocFlow
2
+
3
+ Thank you for your interest in contributing to DocFlow! We welcome contributions from the community to make this project better.
4
+
5
+ ## How to Contribute
6
+
7
+ 1. **Fork the repository** and create your branch from `main`.
8
+ 2. **Clone your fork** and set up a virtual environment.
9
+ 3. **Install dependencies** using `pip install -r requirements.txt`.
10
+ 4. **Create a new branch** for your feature or bugfix:
11
+ ```sh
12
+ git checkout -b my-feature
13
+ ```
14
+ 5. **Make your changes** and add tests as appropriate.
15
+ 6. **Run the test suite** to ensure nothing is broken:
16
+ ```sh
17
+ pytest
18
+ ```
19
+ 7. **Commit your changes** and push your branch to GitHub.
20
+ 8. **Open a Pull Request** with a clear description of your changes.
21
+
22
+ ## Code Style
23
+
24
+ - Follow [PEP8](https://www.python.org/dev/peps/pep-0008/) for Python code style.
25
+ - Use type hints where possible.
26
+ - Write clear, concise docstrings for all public classes and methods.
27
+ - Please review design document to understand overall architecture concept, in particular, seperation of concerns with layer implementation.
28
+
29
+ ## Pull Requests
30
+
31
+ - Ensure your branch is up to date with `main` before submitting a PR.
32
+ - Reference related issues in your PR description.
33
+ - Include tests for new features and bugfixes.
34
+ - Be responsive to code review feedback.
35
+
36
+ ## Reporting Issues
37
+
38
+ - Use the [GitHub Issues](https://github.com/tommyGPT2S/DocFlow/issues) page to report bugs or request features.
39
+ - Please provide as much detail as possible, including steps to reproduce, logs, and your environment.
40
+
41
+ ## Community
42
+
43
+ - Be respectful and inclusive in all interactions.
44
+ - Help others by reviewing PRs and answering questions.
45
+
46
+ Thank you for helping make DocFlow better!
docex-0.2.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Tommy Jiang
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,27 @@
1
+ # Include documentation
2
+ include README.md
3
+ include LICENSE
4
+ include docs/DocFlow_Design.md
5
+ include docs/Developer_Guide.md
6
+ include docs/New Era of Supply Chain/image.png
7
+
8
+ # Include configuration files
9
+ include docflow/config/default_config.yaml
10
+
11
+ # Include database schema
12
+ include docflow/db/sqlite_schema.sql
13
+
14
+ # Include examples
15
+ recursive-include examples *
16
+
17
+ # Include test files
18
+ include test_simple.py
19
+ include test_db_connection.py
20
+ include test_sqlite.py
21
+ include test_transport_workflow.py
22
+ include verify_routes.py
23
+
24
+ # Include any other important files
25
+ include .env.example
26
+ include .gitignore
27
+ include CONTRIBUTING.md
docex-0.2.0/PKG-INFO ADDED
@@ -0,0 +1,149 @@
1
+ Metadata-Version: 2.4
2
+ Name: docex
3
+ Version: 0.2.0
4
+ Summary: A robust, lightweight, and developer-friendly document management and transport system for Python.
5
+ Home-page: https://github.com/tommyGPT2S/DocFlow
6
+ Author: Tommy Jiang
7
+ Author-email: Tommy Jiang <tommyscosc@scos.ai>
8
+ License: MIT License
9
+
10
+ Copyright (c) 2025 Tommy Jiang
11
+
12
+ Permission is hereby granted, free of charge, to any person obtaining a copy
13
+ of this software and associated documentation files (the "Software"), to deal
14
+ in the Software without restriction, including without limitation the rights
15
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
16
+ copies of the Software, and to permit persons to whom the Software is
17
+ furnished to do so, subject to the following conditions:
18
+
19
+ The above copyright notice and this permission notice shall be included in all
20
+ copies or substantial portions of the Software.
21
+
22
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28
+ SOFTWARE.
29
+
30
+ Requires-Python: >=3.8
31
+ Description-Content-Type: text/markdown
32
+ License-File: LICENSE
33
+ Requires-Dist: sqlalchemy>=2.0.0
34
+ Requires-Dist: psycopg2-binary>=2.9.0
35
+ Requires-Dist: pyyaml>=6.0.0
36
+ Requires-Dist: pytest>=7.0.0
37
+ Requires-Dist: boto3>=1.26.0
38
+ Requires-Dist: pydantic>=2.0.0
39
+ Requires-Dist: paramiko>=3.4.0
40
+ Requires-Dist: aiohttp>=3.9.0
41
+ Requires-Dist: python-dotenv>=1.0.0
42
+ Requires-Dist: click>=8.0.0
43
+ Dynamic: author
44
+ Dynamic: home-page
45
+ Dynamic: license-file
46
+ Dynamic: requires-python
47
+
48
+ # DocFlow
49
+
50
+ <!-- Badges -->
51
+ ![License](https://img.shields.io/github/license/tommyGPT2S/DocFlow)
52
+ ![Python](https://img.shields.io/pypi/pyversions/docflow)
53
+ ![Build](https://github.com/tommyGPT2S/DocFlow/actions/workflows/ci.yml/badge.svg)
54
+ <!-- Add PyPI badge here when ready -->
55
+
56
+ ![DocFlow Architecture](docs/New%20Era%20of%20Supply%20Chain/image.png)
57
+
58
+ **DocFlow** is a robust, extensible document management and transport system for Python. It supports multiple storage backends, metadata management, and operation tracking, with a unified API for local, SFTP, HTTP, and other protocols.
59
+
60
+ ## Features
61
+
62
+ - 📁 Document storage and metadata management
63
+ - 🔄 Transport layer with pluggable protocols (local, SFTP, HTTP, etc.)
64
+ - 🛣️ Configurable transport routes and routing rules
65
+ - 📝 Operation and audit tracking
66
+ - 🧩 Extensible architecture for new protocols and workflows
67
+
68
+ ## Installation
69
+
70
+ Install from PyPI:
71
+
72
+ ```sh
73
+ pip install pydocflow
74
+ ```
75
+
76
+ If you want to use PDF processing features (e.g., custom PDF processors), also install:
77
+
78
+ ```sh
79
+ pip install pdfminer.six
80
+ ```
81
+
82
+ ## Quick Start
83
+
84
+ Before using DocFlow in your code, you must initialize the system using the CLI:
85
+
86
+ ```sh
87
+ # Run this once to set up configuration and database
88
+ $ docflow init
89
+ ```
90
+
91
+ Then you can use the Python API (minimal example):
92
+
93
+ ```python
94
+ from docflow import DocFlow
95
+ from pathlib import Path
96
+
97
+ # Create DocFlow instance (will check initialization internally)
98
+ docflow = DocFlow()
99
+
100
+ # Create a basket
101
+ basket = docflow.basket('mybasket')
102
+
103
+ # Create a simple text file
104
+ hello_file = Path('hello.txt')
105
+ hello_file.write_text('Hello scos.ai!')
106
+
107
+ # Add the document to the basket
108
+ doc = basket.add(str(hello_file))
109
+
110
+ # Print document details
111
+ print(doc.get_details())
112
+
113
+ hello_file.unlink()
114
+ ```
115
+ Additional examples can be found in examples/ folder.
116
+
117
+ ## Configuration
118
+
119
+ Configure routes and storage in `default_config.yaml`:
120
+
121
+ ```yaml
122
+ transport_config:
123
+ routes:
124
+ - name: local_backup
125
+ purpose: backup
126
+ protocol: local
127
+ config:
128
+ type: local
129
+ name: local_backup_transport
130
+ base_path: /path/to/backup
131
+ create_dirs: true
132
+ can_upload: true
133
+ can_download: true
134
+ enabled: true
135
+ default_route: local_backup
136
+ ```
137
+
138
+ ## Documentation
139
+
140
+ - [Developer Guide](docs/Developer_Guide.md)
141
+ - [Design Document](docs/DocFlow%20Design.md)
142
+
143
+ ## Contributing
144
+
145
+ Contributions are welcome! Please see [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
146
+
147
+ ## License
148
+
149
+ [MIT License](LICENSE)
docex-0.2.0/README.md ADDED
@@ -0,0 +1,102 @@
1
+ # DocFlow
2
+
3
+ <!-- Badges -->
4
+ ![License](https://img.shields.io/github/license/tommyGPT2S/DocFlow)
5
+ ![Python](https://img.shields.io/pypi/pyversions/docflow)
6
+ ![Build](https://github.com/tommyGPT2S/DocFlow/actions/workflows/ci.yml/badge.svg)
7
+ <!-- Add PyPI badge here when ready -->
8
+
9
+ ![DocFlow Architecture](docs/New%20Era%20of%20Supply%20Chain/image.png)
10
+
11
+ **DocFlow** is a robust, extensible document management and transport system for Python. It supports multiple storage backends, metadata management, and operation tracking, with a unified API for local, SFTP, HTTP, and other protocols.
12
+
13
+ ## Features
14
+
15
+ - 📁 Document storage and metadata management
16
+ - 🔄 Transport layer with pluggable protocols (local, SFTP, HTTP, etc.)
17
+ - 🛣️ Configurable transport routes and routing rules
18
+ - 📝 Operation and audit tracking
19
+ - 🧩 Extensible architecture for new protocols and workflows
20
+
21
+ ## Installation
22
+
23
+ Install from PyPI:
24
+
25
+ ```sh
26
+ pip install pydocflow
27
+ ```
28
+
29
+ If you want to use PDF processing features (e.g., custom PDF processors), also install:
30
+
31
+ ```sh
32
+ pip install pdfminer.six
33
+ ```
34
+
35
+ ## Quick Start
36
+
37
+ Before using DocFlow in your code, you must initialize the system using the CLI:
38
+
39
+ ```sh
40
+ # Run this once to set up configuration and database
41
+ $ docflow init
42
+ ```
43
+
44
+ Then you can use the Python API (minimal example):
45
+
46
+ ```python
47
+ from docflow import DocFlow
48
+ from pathlib import Path
49
+
50
+ # Create DocFlow instance (will check initialization internally)
51
+ docflow = DocFlow()
52
+
53
+ # Create a basket
54
+ basket = docflow.basket('mybasket')
55
+
56
+ # Create a simple text file
57
+ hello_file = Path('hello.txt')
58
+ hello_file.write_text('Hello scos.ai!')
59
+
60
+ # Add the document to the basket
61
+ doc = basket.add(str(hello_file))
62
+
63
+ # Print document details
64
+ print(doc.get_details())
65
+
66
+ hello_file.unlink()
67
+ ```
68
+ Additional examples can be found in examples/ folder.
69
+
70
+ ## Configuration
71
+
72
+ Configure routes and storage in `default_config.yaml`:
73
+
74
+ ```yaml
75
+ transport_config:
76
+ routes:
77
+ - name: local_backup
78
+ purpose: backup
79
+ protocol: local
80
+ config:
81
+ type: local
82
+ name: local_backup_transport
83
+ base_path: /path/to/backup
84
+ create_dirs: true
85
+ can_upload: true
86
+ can_download: true
87
+ enabled: true
88
+ default_route: local_backup
89
+ ```
90
+
91
+ ## Documentation
92
+
93
+ - [Developer Guide](docs/Developer_Guide.md)
94
+ - [Design Document](docs/DocFlow%20Design.md)
95
+
96
+ ## Contributing
97
+
98
+ Contributions are welcome! Please see [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
99
+
100
+ ## License
101
+
102
+ [MIT License](LICENSE)
@@ -0,0 +1,32 @@
1
+ """
2
+ DocEX - Document Management Library
3
+
4
+ This library provides a robust, extensible document management and transport system.
5
+ """
6
+
7
+ from docex.docex import DocEX
8
+
9
+ # Setup DocEX with configuration
10
+ DocEX.setup(
11
+ database={
12
+ 'type': 'sqlite',
13
+ 'sqlite': {'path': 'docex.db'}
14
+ }
15
+ )
16
+
17
+ # Create DocEX instance
18
+ docex = DocEX()
19
+ basket = docex.create_basket('my_basket')
20
+
21
+ # Get available metadata keys
22
+ print(DocEX.get_metadata_keys())
23
+
24
+ __all__ = [
25
+ 'DocEX', # Main entry point
26
+ 'DocBasket', # Document basket
27
+ 'Document', # Document class
28
+ 'Route', # Transport route
29
+ 'UserContext', # User context for auditing
30
+ ]
31
+
32
+ __version__ = '0.1.9'
@@ -0,0 +1,180 @@
1
+ """
2
+ DocEX CLI commands
3
+
4
+ This module provides command-line interface for DocEX operations.
5
+ """
6
+
7
+ import click
8
+ import os
9
+ from pathlib import Path
10
+ import yaml
11
+ from docex import DocEX
12
+ from docex.config.docflow_config import DocFlowConfig
13
+ from docex.db.connection import Database
14
+ from docex.db.models import Base
15
+ from docex.transport.models import Base as TransportBase
16
+ import sqlite3
17
+ from datetime import datetime, UTC
18
+ from sqlalchemy import inspect
19
+ import logging
20
+ import sys
21
+ from typing import Optional
22
+
23
+ # Configure logging
24
+ logging.basicConfig(level=logging.INFO)
25
+ logger = logging.getLogger(__name__)
26
+
27
+ @click.group()
28
+ def cli():
29
+ """DocEX command-line interface"""
30
+ pass
31
+
32
+ @cli.command()
33
+ @click.option('--config', type=click.Path(exists=True), help='Path to configuration file')
34
+ @click.option('--force', is_flag=True, help='Force reinitialization')
35
+ @click.option('--db-type', type=click.Choice(['sqlite', 'postgresql']), help='Database type')
36
+ @click.option('--db-path', type=click.Path(), help='Path to SQLite database file')
37
+ @click.option('--db-host', help='Database host')
38
+ @click.option('--db-port', type=int, help='Database port')
39
+ @click.option('--db-name', help='Database name')
40
+ @click.option('--db-user', help='Database user')
41
+ @click.option('--db-password', help='Database password')
42
+ @click.option('--storage-path', type=click.Path(), help='Path to storage directory')
43
+ @click.option('--log-level', type=click.Choice(['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']), help='Logging level')
44
+ def init(config: Optional[str], force: bool, **kwargs):
45
+ """Initialize DocEX with configuration"""
46
+ try:
47
+ # Check if already initialized
48
+ is_initialized = DocEX.is_initialized()
49
+
50
+ if is_initialized and not force:
51
+ if not click.confirm('DocEX is already initialized. Do you want to reinitialize? This will drop all existing data.'):
52
+ return
53
+
54
+ # Load configuration
55
+ if config:
56
+ config_path = Path(config)
57
+ if not config_path.exists():
58
+ click.echo(f'Configuration file not found: {config}', err=True)
59
+ return
60
+
61
+ user_config = DocFlowConfig.from_file(config)
62
+ else:
63
+ user_config = DocEX.get_defaults()
64
+
65
+ # Override with command line options
66
+ if kwargs.get('db_type'):
67
+ user_config['database']['type'] = kwargs['db_type']
68
+
69
+ if kwargs.get('db_path'):
70
+ user_config['database']['sqlite']['path'] = kwargs['db_path']
71
+
72
+ if kwargs.get('db_host'):
73
+ user_config['database']['postgres']['host'] = kwargs['db_host']
74
+
75
+ if kwargs.get('db_port'):
76
+ user_config['database']['postgres']['port'] = kwargs['db_port']
77
+
78
+ if kwargs.get('db_name'):
79
+ user_config['database']['postgres']['database'] = kwargs['db_name']
80
+
81
+ if kwargs.get('db_user'):
82
+ user_config['database']['postgres']['user'] = kwargs['db_user']
83
+
84
+ if kwargs.get('db_password'):
85
+ user_config['database']['postgres']['password'] = kwargs['db_password']
86
+
87
+ if kwargs.get('storage_path'):
88
+ user_config['storage']['filesystem']['path'] = kwargs['storage_path']
89
+
90
+ if kwargs.get('log_level'):
91
+ user_config['logging']['level'] = kwargs['log_level']
92
+
93
+ # Initialize DocEX
94
+ DocEX.setup(**user_config)
95
+
96
+ # Save configuration
97
+ config_path = Path.home() / '.docex' / 'config.yaml'
98
+ config_path.parent.mkdir(parents=True, exist_ok=True)
99
+
100
+ with open(config_path, 'w') as f:
101
+ yaml.dump(user_config, f)
102
+
103
+ # Create DocEX instance for verification
104
+ docex = DocEX()
105
+
106
+ click.echo('\nDocEX initialized successfully!')
107
+
108
+ except Exception as e:
109
+ click.echo(f'Error initializing DocEX: {str(e)}', err=True)
110
+ sys.exit(1)
111
+
112
+ @cli.group()
113
+ def processor():
114
+ """Manage document processors"""
115
+ pass
116
+
117
+ @processor.command('register')
118
+ @click.option('--name', required=True, help='Processor class name (must match Python class)')
119
+ @click.option('--type', required=True, help='Processor type (e.g., format_converter, content_processor)')
120
+ @click.option('--description', default='', help='Description of the processor')
121
+ @click.option('--config', default='{}', help='JSON string for processor config')
122
+ @click.option('--enabled/--disabled', default=True, help='Enable or disable the processor')
123
+ def register_processor(name, type, description, config, enabled):
124
+ """Register a new processor in the database"""
125
+ from docex.db.models import Processor
126
+ import json
127
+ db = Database()
128
+ with db.session() as session:
129
+ if session.query(Processor).filter_by(name=name).first():
130
+ click.echo(f"Processor '{name}' already exists.")
131
+ return
132
+ try:
133
+ config_dict = json.loads(config)
134
+ except Exception as e:
135
+ click.echo(f"Invalid config JSON: {e}")
136
+ return
137
+ processor = Processor(
138
+ name=name,
139
+ type=type,
140
+ description=description,
141
+ config=config_dict,
142
+ enabled=enabled,
143
+ created_at=datetime.now(UTC),
144
+ updated_at=datetime.now(UTC)
145
+ )
146
+ session.add(processor)
147
+ session.commit()
148
+ click.echo(f"Processor '{name}' registered successfully.")
149
+
150
+ @processor.command('remove')
151
+ @click.option('--name', required=True, help='Processor class name to remove')
152
+ def remove_processor(name):
153
+ """Remove a processor from the database"""
154
+ from docex.db.models import Processor
155
+ db = Database()
156
+ with db.session() as session:
157
+ processor = session.query(Processor).filter_by(name=name).first()
158
+ if not processor:
159
+ click.echo(f"Processor '{name}' not found.")
160
+ return
161
+ session.delete(processor)
162
+ session.commit()
163
+ click.echo(f"Processor '{name}' removed successfully.")
164
+
165
+ @processor.command('list')
166
+ def list_processors():
167
+ """List all registered processors"""
168
+ from docex.db.models import Processor
169
+ db = Database()
170
+ with db.session() as session:
171
+ processors = session.query(Processor).all()
172
+ if not processors:
173
+ click.echo("No processors registered.")
174
+ return
175
+ click.echo("Registered Processors:")
176
+ for p in processors:
177
+ click.echo(f"- {p.name} | Type: {p.type} | Enabled: {p.enabled} | Description: {p.description}")
178
+
179
+ if __name__ == '__main__':
180
+ cli()
File without changes