document-analyser 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. document_analyser-0.1.0/.env.example +85 -0
  2. document_analyser-0.1.0/.gitignore +193 -0
  3. document_analyser-0.1.0/AI-CONVERSATION.md +2185 -0
  4. document_analyser-0.1.0/CHARTLENS_STARTER.md +548 -0
  5. document_analyser-0.1.0/CLEANUP_SUMMARY.md +187 -0
  6. document_analyser-0.1.0/DEPLOYMENT.md +340 -0
  7. document_analyser-0.1.0/DOCUMENTLENS_SETUP.md +243 -0
  8. document_analyser-0.1.0/Dockerfile +98 -0
  9. document_analyser-0.1.0/IMPLEMENTATION_PLAN.md +215 -0
  10. document_analyser-0.1.0/LEGAL.md +177 -0
  11. document_analyser-0.1.0/LICENSE +21 -0
  12. document_analyser-0.1.0/PKG-INFO +178 -0
  13. document_analyser-0.1.0/README.md +149 -0
  14. document_analyser-0.1.0/REPOSITORY_SPLIT.sh +378 -0
  15. document_analyser-0.1.0/STARTUP_GUIDE.md +158 -0
  16. document_analyser-0.1.0/app/__init__.py +1 -0
  17. document_analyser-0.1.0/app/analyzers/__init__.py +1 -0
  18. document_analyser-0.1.0/app/analyzers/domain_mapper.py +173 -0
  19. document_analyser-0.1.0/app/analyzers/integrity_checker.py +386 -0
  20. document_analyser-0.1.0/app/analyzers/keyword_analyzer.py +159 -0
  21. document_analyser-0.1.0/app/analyzers/ner_analyzer.py +40 -0
  22. document_analyser-0.1.0/app/analyzers/ngram_analyzer.py +64 -0
  23. document_analyser-0.1.0/app/analyzers/readability.py +150 -0
  24. document_analyser-0.1.0/app/analyzers/sentiment_analyzer.py +279 -0
  25. document_analyser-0.1.0/app/analyzers/structural_mismatch.py +276 -0
  26. document_analyser-0.1.0/app/analyzers/word_analysis.py +289 -0
  27. document_analyser-0.1.0/app/analyzers/writing_quality.py +334 -0
  28. document_analyser-0.1.0/app/api/__init__.py +1 -0
  29. document_analyser-0.1.0/app/api/routes/__init__.py +1 -0
  30. document_analyser-0.1.0/app/api/routes/academic_analysis.py +373 -0
  31. document_analyser-0.1.0/app/api/routes/advanced_text.py +147 -0
  32. document_analyser-0.1.0/app/api/routes/analysis.py +205 -0
  33. document_analyser-0.1.0/app/api/routes/future_endpoints.py +913 -0
  34. document_analyser-0.1.0/app/api/routes/health.py +25 -0
  35. document_analyser-0.1.0/app/api/routes/semantic_analysis.py +148 -0
  36. document_analyser-0.1.0/app/api/routes/text_analysis.py +126 -0
  37. document_analyser-0.1.0/app/core/__init__.py +1 -0
  38. document_analyser-0.1.0/app/core/config.py +73 -0
  39. document_analyser-0.1.0/app/data/ai_patterns.json +146 -0
  40. document_analyser-0.1.0/app/main.py +115 -0
  41. document_analyser-0.1.0/app/models/__init__.py +1 -0
  42. document_analyser-0.1.0/app/models/schemas.py +318 -0
  43. document_analyser-0.1.0/app/services/__init__.py +1 -0
  44. document_analyser-0.1.0/app/services/document_processor.py +472 -0
  45. document_analyser-0.1.0/app/services/doi_resolver.py +327 -0
  46. document_analyser-0.1.0/app/services/reference_extractor.py +117 -0
  47. document_analyser-0.1.0/app/services/url_verifier.py +370 -0
  48. document_analyser-0.1.0/app/utils/__init__.py +1 -0
  49. document_analyser-0.1.0/deploy.sh +135 -0
  50. document_analyser-0.1.0/docker-compose.yml +75 -0
  51. document_analyser-0.1.0/docs/README.md +57 -0
  52. document_analyser-0.1.0/docs/architecture/INTEGRATION.md +476 -0
  53. document_analyser-0.1.0/docs/architecture/OVERVIEW.md +303 -0
  54. document_analyser-0.1.0/docs/integration-examples/python-sdk-integration.md +437 -0
  55. document_analyser-0.1.0/pyproject.toml +44 -0
  56. document_analyser-0.1.0/start.sh +50 -0
  57. document_analyser-0.1.0/test-data/.gitkeep +0 -0
  58. document_analyser-0.1.0/tests/__init__.py +1 -0
  59. document_analyser-0.1.0/tests/conftest.py +202 -0
  60. document_analyser-0.1.0/tests/test_academic_analysis.py +236 -0
  61. document_analyser-0.1.0/tests/test_files.py +399 -0
  62. document_analyser-0.1.0/tests/test_health.py +93 -0
  63. document_analyser-0.1.0/tests/test_semantic_analysis.py +165 -0
  64. document_analyser-0.1.0/tests/test_text_analysis.py +210 -0
  65. document_analyser-0.1.0/uv.lock +3412 -0
@@ -0,0 +1,85 @@
1
+ # DocumentLens Configuration Template
2
+ # Copy this to .env and update values for your deployment
3
+
4
+ # ===========================================
5
+ # 🇦🇺 AUSTRALIAN DOCUMENTLENS CONFIGURATION
6
+ # ===========================================
7
+
8
+ # Application Settings
9
+ DEBUG=false
10
+ HOST=0.0.0.0
11
+ PORT=8002
12
+ WORKERS=4
13
+
14
+ # Security (CRITICAL: Change in production!)
15
+ SECRET_KEY=change-this-super-secret-key-in-production-immediately
16
+
17
+ # File Processing Limits
18
+ MAX_FILE_SIZE=52428800 # 50MB in bytes
19
+ MAX_FILES_PER_REQUEST=10 # Maximum files per upload
20
+
21
+ # Rate Limiting (Australian-friendly)
22
+ RATE_LIMIT=30/minute # Requests per minute per IP
23
+
24
+ # CORS Settings
25
+ # For development (allow all)
26
+ ALLOWED_ORIGINS=*
27
+
28
+ # For production (specify your domains)
29
+ # ALLOWED_ORIGINS=https://document-lens.serveur.au,https://www.document-lens.serveur.au
30
+
31
+ # ===========================================
32
+ # 🔧 OPTIONAL ADVANCED SETTINGS
33
+ # ===========================================
34
+
35
+ # Logging Level
36
+ LOG_LEVEL=info # debug, info, warning, error
37
+
38
+ # Database (if needed in future)
39
+ # DATABASE_URL=postgresql://user:password@localhost/documentlens
40
+
41
+ # External Service URLs (if customized)
42
+ # DOI_RESOLVER_BASE_URL=https://api.crossref.org
43
+ # URL_VERIFY_TIMEOUT=30
44
+
45
+ # Cache Settings (if Redis added in future)
46
+ # REDIS_URL=redis://localhost:6379/0
47
+ # CACHE_TTL=3600
48
+
49
+ # ===========================================
50
+ # 📊 MONITORING & PERFORMANCE
51
+ # ===========================================
52
+
53
+ # Enable metrics endpoint
54
+ ENABLE_METRICS=true
55
+
56
+ # Request timeout (seconds)
57
+ REQUEST_TIMEOUT=60
58
+
59
+ # Maximum concurrent requests
60
+ MAX_CONCURRENT_REQUESTS=100
61
+
62
+ # ===========================================
63
+ # 🌐 DEPLOYMENT-SPECIFIC SETTINGS
64
+ # ===========================================
65
+
66
+ # For Docker deployments
67
+ PYTHONPATH=/app
68
+
69
+ # For systemd service deployments
70
+ # HOME=/opt/documentlens
71
+ # USER=www-data
72
+
73
+ # ===========================================
74
+ # 💡 NOTES
75
+ # ===========================================
76
+ #
77
+ # 1. This is a template - copy to .env and customize
78
+ # 2. Never commit .env to version control
79
+ # 3. SECRET_KEY should be a long, random string
80
+ # 4. Adjust WORKERS based on your VPS resources:
81
+ # - 1GB RAM: WORKERS=2
82
+ # - 2GB RAM: WORKERS=4
83
+ # - 4GB+ RAM: WORKERS=8
84
+ # 5. For production, always use HTTPS and specific CORS origins
85
+ #
@@ -0,0 +1,193 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+ build/
8
+ develop-eggs/
9
+ dist/
10
+ downloads/
11
+ eggs/
12
+ .eggs/
13
+ lib/
14
+ lib64/
15
+ parts/
16
+ sdist/
17
+ var/
18
+ wheels/
19
+ share/python-wheels/
20
+ *.egg-info/
21
+ .installed.cfg
22
+ *.egg
23
+ MANIFEST
24
+ *.manifest
25
+ *.spec
26
+ pip-log.txt
27
+ pip-delete-this-directory.txt
28
+
29
+ # Virtual Environments
30
+ .env
31
+ .venv
32
+ env/
33
+ venv/
34
+ ENV/
35
+ env.bak/
36
+ venv.bak/
37
+ pythonenv*
38
+
39
+ # PyCharm
40
+ .idea/
41
+ *.iml
42
+ *.iws
43
+ out/
44
+
45
+ # VS Code
46
+ .vscode/
47
+ *.code-workspace
48
+ .history/
49
+
50
+ # Jupyter Notebook
51
+ .ipynb_checkpoints
52
+ *.ipynb_checkpoints/
53
+
54
+ # pyenv
55
+ .python-version
56
+
57
+ # pipenv
58
+ Pipfile.lock
59
+
60
+ # poetry
61
+ poetry.lock
62
+
63
+ # Node.js
64
+ node_modules/
65
+ npm-debug.log*
66
+ yarn-debug.log*
67
+ yarn-error.log*
68
+ lerna-debug.log*
69
+ .pnpm-debug.log*
70
+ *.tsbuildinfo
71
+ .npm
72
+ .eslintcache
73
+ .node_repl_history
74
+ *.tgz
75
+ .yarn-integrity
76
+
77
+ # React/Vite
78
+ frontend/dist/
79
+ frontend/dist-ssr/
80
+ frontend/*.local
81
+ frontend/.vite/
82
+
83
+ # Environment variables
84
+ .env
85
+ .env.local
86
+ .env.development.local
87
+ .env.test.local
88
+ .env.production.local
89
+
90
+ # Logs
91
+ logs/
92
+ *.log
93
+ npm-debug.log*
94
+ yarn-debug.log*
95
+ yarn-error.log*
96
+ pnpm-debug.log*
97
+ lerna-debug.log*
98
+
99
+ # OS files
100
+ .DS_Store
101
+ .DS_Store?
102
+ ._*
103
+ .Spotlight-V100
104
+ .Trashes
105
+ ehthumbs.db
106
+ Thumbs.db
107
+ desktop.ini
108
+
109
+ # Testing
110
+ test-data/*.pdf
111
+ test-data/*.docx
112
+ test-data/*.pptx
113
+ htmlcov/
114
+ .tox/
115
+ .nox/
116
+ .coverage
117
+ .coverage.*
118
+ .cache
119
+ .pytest_cache/
120
+ cover/
121
+ *.cover
122
+ *.py,cover
123
+ .hypothesis/
124
+ nosetests.xml
125
+ coverage.xml
126
+ *.coveragerc
127
+ .dmypy.json
128
+ dmypy.json
129
+
130
+ # Database
131
+ *.db
132
+ *.sqlite
133
+ *.sqlite3
134
+ instance/
135
+
136
+ # Temporary files
137
+ *.tmp
138
+ *.bak
139
+ *.swp
140
+ *~.nib
141
+ *.orig
142
+ .temp/
143
+ tmp/
144
+ temp/
145
+
146
+ # Documentation builds
147
+ docs/_build/
148
+ site/
149
+
150
+ # Package files
151
+ *.jar
152
+ *.war
153
+ *.nar
154
+ *.ear
155
+ *.zip
156
+ *.tar.gz
157
+ *.rar
158
+
159
+ # Editor backups
160
+ *~
161
+ \#*\#
162
+ .\#*
163
+
164
+ # FastAPI
165
+ backend/instance/
166
+ backend/*.db
167
+
168
+ # Secrets
169
+ *.pem
170
+ *.key
171
+ *.crt
172
+ *.p12
173
+ secrets/
174
+
175
+ # Docker
176
+ .dockerignore
177
+ docker-compose.override.yml
178
+
179
+ # Production builds
180
+ build/
181
+ dist/
182
+
183
+ # Local development
184
+ .local/
185
+ scratch/
186
+
187
+ # Uploaded files (in production these would be handled differently)
188
+ uploads/
189
+ media/
190
+
191
+ # Static files collection
192
+ staticfiles/
193
+ static_root/