document-analyser 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- document_analyser-0.1.0/.env.example +85 -0
- document_analyser-0.1.0/.gitignore +193 -0
- document_analyser-0.1.0/AI-CONVERSATION.md +2185 -0
- document_analyser-0.1.0/CHARTLENS_STARTER.md +548 -0
- document_analyser-0.1.0/CLEANUP_SUMMARY.md +187 -0
- document_analyser-0.1.0/DEPLOYMENT.md +340 -0
- document_analyser-0.1.0/DOCUMENTLENS_SETUP.md +243 -0
- document_analyser-0.1.0/Dockerfile +98 -0
- document_analyser-0.1.0/IMPLEMENTATION_PLAN.md +215 -0
- document_analyser-0.1.0/LEGAL.md +177 -0
- document_analyser-0.1.0/LICENSE +21 -0
- document_analyser-0.1.0/PKG-INFO +178 -0
- document_analyser-0.1.0/README.md +149 -0
- document_analyser-0.1.0/REPOSITORY_SPLIT.sh +378 -0
- document_analyser-0.1.0/STARTUP_GUIDE.md +158 -0
- document_analyser-0.1.0/app/__init__.py +1 -0
- document_analyser-0.1.0/app/analyzers/__init__.py +1 -0
- document_analyser-0.1.0/app/analyzers/domain_mapper.py +173 -0
- document_analyser-0.1.0/app/analyzers/integrity_checker.py +386 -0
- document_analyser-0.1.0/app/analyzers/keyword_analyzer.py +159 -0
- document_analyser-0.1.0/app/analyzers/ner_analyzer.py +40 -0
- document_analyser-0.1.0/app/analyzers/ngram_analyzer.py +64 -0
- document_analyser-0.1.0/app/analyzers/readability.py +150 -0
- document_analyser-0.1.0/app/analyzers/sentiment_analyzer.py +279 -0
- document_analyser-0.1.0/app/analyzers/structural_mismatch.py +276 -0
- document_analyser-0.1.0/app/analyzers/word_analysis.py +289 -0
- document_analyser-0.1.0/app/analyzers/writing_quality.py +334 -0
- document_analyser-0.1.0/app/api/__init__.py +1 -0
- document_analyser-0.1.0/app/api/routes/__init__.py +1 -0
- document_analyser-0.1.0/app/api/routes/academic_analysis.py +373 -0
- document_analyser-0.1.0/app/api/routes/advanced_text.py +147 -0
- document_analyser-0.1.0/app/api/routes/analysis.py +205 -0
- document_analyser-0.1.0/app/api/routes/future_endpoints.py +913 -0
- document_analyser-0.1.0/app/api/routes/health.py +25 -0
- document_analyser-0.1.0/app/api/routes/semantic_analysis.py +148 -0
- document_analyser-0.1.0/app/api/routes/text_analysis.py +126 -0
- document_analyser-0.1.0/app/core/__init__.py +1 -0
- document_analyser-0.1.0/app/core/config.py +73 -0
- document_analyser-0.1.0/app/data/ai_patterns.json +146 -0
- document_analyser-0.1.0/app/main.py +115 -0
- document_analyser-0.1.0/app/models/__init__.py +1 -0
- document_analyser-0.1.0/app/models/schemas.py +318 -0
- document_analyser-0.1.0/app/services/__init__.py +1 -0
- document_analyser-0.1.0/app/services/document_processor.py +472 -0
- document_analyser-0.1.0/app/services/doi_resolver.py +327 -0
- document_analyser-0.1.0/app/services/reference_extractor.py +117 -0
- document_analyser-0.1.0/app/services/url_verifier.py +370 -0
- document_analyser-0.1.0/app/utils/__init__.py +1 -0
- document_analyser-0.1.0/deploy.sh +135 -0
- document_analyser-0.1.0/docker-compose.yml +75 -0
- document_analyser-0.1.0/docs/README.md +57 -0
- document_analyser-0.1.0/docs/architecture/INTEGRATION.md +476 -0
- document_analyser-0.1.0/docs/architecture/OVERVIEW.md +303 -0
- document_analyser-0.1.0/docs/integration-examples/python-sdk-integration.md +437 -0
- document_analyser-0.1.0/pyproject.toml +44 -0
- document_analyser-0.1.0/start.sh +50 -0
- document_analyser-0.1.0/test-data/.gitkeep +0 -0
- document_analyser-0.1.0/tests/__init__.py +1 -0
- document_analyser-0.1.0/tests/conftest.py +202 -0
- document_analyser-0.1.0/tests/test_academic_analysis.py +236 -0
- document_analyser-0.1.0/tests/test_files.py +399 -0
- document_analyser-0.1.0/tests/test_health.py +93 -0
- document_analyser-0.1.0/tests/test_semantic_analysis.py +165 -0
- document_analyser-0.1.0/tests/test_text_analysis.py +210 -0
- document_analyser-0.1.0/uv.lock +3412 -0
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
# DocumentLens Configuration Template
|
|
2
|
+
# Copy this to .env and update values for your deployment
|
|
3
|
+
|
|
4
|
+
# ===========================================
|
|
5
|
+
# 🇦🇺 AUSTRALIAN DOCUMENTLENS CONFIGURATION
|
|
6
|
+
# ===========================================
|
|
7
|
+
|
|
8
|
+
# Application Settings
|
|
9
|
+
DEBUG=false
|
|
10
|
+
HOST=0.0.0.0
|
|
11
|
+
PORT=8002
|
|
12
|
+
WORKERS=4
|
|
13
|
+
|
|
14
|
+
# Security (CRITICAL: Change in production!)
|
|
15
|
+
SECRET_KEY=change-this-super-secret-key-in-production-immediately
|
|
16
|
+
|
|
17
|
+
# File Processing Limits
|
|
18
|
+
MAX_FILE_SIZE=52428800 # 50MB in bytes
|
|
19
|
+
MAX_FILES_PER_REQUEST=10 # Maximum files per upload
|
|
20
|
+
|
|
21
|
+
# Rate Limiting (Australian-friendly)
|
|
22
|
+
RATE_LIMIT=30/minute # Requests per minute per IP
|
|
23
|
+
|
|
24
|
+
# CORS Settings
|
|
25
|
+
# For development (allow all)
|
|
26
|
+
ALLOWED_ORIGINS=*
|
|
27
|
+
|
|
28
|
+
# For production (specify your domains)
|
|
29
|
+
# ALLOWED_ORIGINS=https://document-lens.serveur.au,https://www.document-lens.serveur.au
|
|
30
|
+
|
|
31
|
+
# ===========================================
|
|
32
|
+
# 🔧 OPTIONAL ADVANCED SETTINGS
|
|
33
|
+
# ===========================================
|
|
34
|
+
|
|
35
|
+
# Logging Level
|
|
36
|
+
LOG_LEVEL=info # debug, info, warning, error
|
|
37
|
+
|
|
38
|
+
# Database (if needed in future)
|
|
39
|
+
# DATABASE_URL=postgresql://user:password@localhost/documentlens
|
|
40
|
+
|
|
41
|
+
# External Service URLs (if customized)
|
|
42
|
+
# DOI_RESOLVER_BASE_URL=https://api.crossref.org
|
|
43
|
+
# URL_VERIFY_TIMEOUT=30
|
|
44
|
+
|
|
45
|
+
# Cache Settings (if Redis added in future)
|
|
46
|
+
# REDIS_URL=redis://localhost:6379/0
|
|
47
|
+
# CACHE_TTL=3600
|
|
48
|
+
|
|
49
|
+
# ===========================================
|
|
50
|
+
# 📊 MONITORING & PERFORMANCE
|
|
51
|
+
# ===========================================
|
|
52
|
+
|
|
53
|
+
# Enable metrics endpoint
|
|
54
|
+
ENABLE_METRICS=true
|
|
55
|
+
|
|
56
|
+
# Request timeout (seconds)
|
|
57
|
+
REQUEST_TIMEOUT=60
|
|
58
|
+
|
|
59
|
+
# Maximum concurrent requests
|
|
60
|
+
MAX_CONCURRENT_REQUESTS=100
|
|
61
|
+
|
|
62
|
+
# ===========================================
|
|
63
|
+
# 🌐 DEPLOYMENT-SPECIFIC SETTINGS
|
|
64
|
+
# ===========================================
|
|
65
|
+
|
|
66
|
+
# For Docker deployments
|
|
67
|
+
PYTHONPATH=/app
|
|
68
|
+
|
|
69
|
+
# For systemd service deployments
|
|
70
|
+
# HOME=/opt/documentlens
|
|
71
|
+
# USER=www-data
|
|
72
|
+
|
|
73
|
+
# ===========================================
|
|
74
|
+
# 💡 NOTES
|
|
75
|
+
# ===========================================
|
|
76
|
+
#
|
|
77
|
+
# 1. This is a template - copy to .env and customize
|
|
78
|
+
# 2. Never commit .env to version control
|
|
79
|
+
# 3. SECRET_KEY should be a long, random string
|
|
80
|
+
# 4. Adjust WORKERS based on your VPS resources:
|
|
81
|
+
# - 1GB RAM: WORKERS=2
|
|
82
|
+
# - 2GB RAM: WORKERS=4
|
|
83
|
+
# - 4GB+ RAM: WORKERS=8
|
|
84
|
+
# 5. For production, always use HTTPS and specific CORS origins
|
|
85
|
+
#
|
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
*.so
|
|
6
|
+
.Python
|
|
7
|
+
build/
|
|
8
|
+
develop-eggs/
|
|
9
|
+
dist/
|
|
10
|
+
downloads/
|
|
11
|
+
eggs/
|
|
12
|
+
.eggs/
|
|
13
|
+
lib/
|
|
14
|
+
lib64/
|
|
15
|
+
parts/
|
|
16
|
+
sdist/
|
|
17
|
+
var/
|
|
18
|
+
wheels/
|
|
19
|
+
share/python-wheels/
|
|
20
|
+
*.egg-info/
|
|
21
|
+
.installed.cfg
|
|
22
|
+
*.egg
|
|
23
|
+
MANIFEST
|
|
24
|
+
*.manifest
|
|
25
|
+
*.spec
|
|
26
|
+
pip-log.txt
|
|
27
|
+
pip-delete-this-directory.txt
|
|
28
|
+
|
|
29
|
+
# Virtual Environments
|
|
30
|
+
.env
|
|
31
|
+
.venv
|
|
32
|
+
env/
|
|
33
|
+
venv/
|
|
34
|
+
ENV/
|
|
35
|
+
env.bak/
|
|
36
|
+
venv.bak/
|
|
37
|
+
pythonenv*
|
|
38
|
+
|
|
39
|
+
# PyCharm
|
|
40
|
+
.idea/
|
|
41
|
+
*.iml
|
|
42
|
+
*.iws
|
|
43
|
+
out/
|
|
44
|
+
|
|
45
|
+
# VS Code
|
|
46
|
+
.vscode/
|
|
47
|
+
*.code-workspace
|
|
48
|
+
.history/
|
|
49
|
+
|
|
50
|
+
# Jupyter Notebook
|
|
51
|
+
.ipynb_checkpoints
|
|
52
|
+
*.ipynb_checkpoints/
|
|
53
|
+
|
|
54
|
+
# pyenv
|
|
55
|
+
.python-version
|
|
56
|
+
|
|
57
|
+
# pipenv
|
|
58
|
+
Pipfile.lock
|
|
59
|
+
|
|
60
|
+
# poetry
|
|
61
|
+
poetry.lock
|
|
62
|
+
|
|
63
|
+
# Node.js
|
|
64
|
+
node_modules/
|
|
65
|
+
npm-debug.log*
|
|
66
|
+
yarn-debug.log*
|
|
67
|
+
yarn-error.log*
|
|
68
|
+
lerna-debug.log*
|
|
69
|
+
.pnpm-debug.log*
|
|
70
|
+
*.tsbuildinfo
|
|
71
|
+
.npm
|
|
72
|
+
.eslintcache
|
|
73
|
+
.node_repl_history
|
|
74
|
+
*.tgz
|
|
75
|
+
.yarn-integrity
|
|
76
|
+
|
|
77
|
+
# React/Vite
|
|
78
|
+
frontend/dist/
|
|
79
|
+
frontend/dist-ssr/
|
|
80
|
+
frontend/*.local
|
|
81
|
+
frontend/.vite/
|
|
82
|
+
|
|
83
|
+
# Environment variables
|
|
84
|
+
.env
|
|
85
|
+
.env.local
|
|
86
|
+
.env.development.local
|
|
87
|
+
.env.test.local
|
|
88
|
+
.env.production.local
|
|
89
|
+
|
|
90
|
+
# Logs
|
|
91
|
+
logs/
|
|
92
|
+
*.log
|
|
93
|
+
npm-debug.log*
|
|
94
|
+
yarn-debug.log*
|
|
95
|
+
yarn-error.log*
|
|
96
|
+
pnpm-debug.log*
|
|
97
|
+
lerna-debug.log*
|
|
98
|
+
|
|
99
|
+
# OS files
|
|
100
|
+
.DS_Store
|
|
101
|
+
.DS_Store?
|
|
102
|
+
._*
|
|
103
|
+
.Spotlight-V100
|
|
104
|
+
.Trashes
|
|
105
|
+
ehthumbs.db
|
|
106
|
+
Thumbs.db
|
|
107
|
+
desktop.ini
|
|
108
|
+
|
|
109
|
+
# Testing
|
|
110
|
+
test-data/*.pdf
|
|
111
|
+
test-data/*.docx
|
|
112
|
+
test-data/*.pptx
|
|
113
|
+
htmlcov/
|
|
114
|
+
.tox/
|
|
115
|
+
.nox/
|
|
116
|
+
.coverage
|
|
117
|
+
.coverage.*
|
|
118
|
+
.cache
|
|
119
|
+
.pytest_cache/
|
|
120
|
+
cover/
|
|
121
|
+
*.cover
|
|
122
|
+
*.py,cover
|
|
123
|
+
.hypothesis/
|
|
124
|
+
nosetests.xml
|
|
125
|
+
coverage.xml
|
|
126
|
+
*.coveragerc
|
|
127
|
+
.dmypy.json
|
|
128
|
+
dmypy.json
|
|
129
|
+
|
|
130
|
+
# Database
|
|
131
|
+
*.db
|
|
132
|
+
*.sqlite
|
|
133
|
+
*.sqlite3
|
|
134
|
+
instance/
|
|
135
|
+
|
|
136
|
+
# Temporary files
|
|
137
|
+
*.tmp
|
|
138
|
+
*.bak
|
|
139
|
+
*.swp
|
|
140
|
+
*~.nib
|
|
141
|
+
*.orig
|
|
142
|
+
.temp/
|
|
143
|
+
tmp/
|
|
144
|
+
temp/
|
|
145
|
+
|
|
146
|
+
# Documentation builds
|
|
147
|
+
docs/_build/
|
|
148
|
+
site/
|
|
149
|
+
|
|
150
|
+
# Package files
|
|
151
|
+
*.jar
|
|
152
|
+
*.war
|
|
153
|
+
*.nar
|
|
154
|
+
*.ear
|
|
155
|
+
*.zip
|
|
156
|
+
*.tar.gz
|
|
157
|
+
*.rar
|
|
158
|
+
|
|
159
|
+
# Editor backups
|
|
160
|
+
*~
|
|
161
|
+
\#*\#
|
|
162
|
+
.\#*
|
|
163
|
+
|
|
164
|
+
# FastAPI
|
|
165
|
+
backend/instance/
|
|
166
|
+
backend/*.db
|
|
167
|
+
|
|
168
|
+
# Secrets
|
|
169
|
+
*.pem
|
|
170
|
+
*.key
|
|
171
|
+
*.crt
|
|
172
|
+
*.p12
|
|
173
|
+
secrets/
|
|
174
|
+
|
|
175
|
+
# Docker
|
|
176
|
+
.dockerignore
|
|
177
|
+
docker-compose.override.yml
|
|
178
|
+
|
|
179
|
+
# Production builds
|
|
180
|
+
build/
|
|
181
|
+
dist/
|
|
182
|
+
|
|
183
|
+
# Local development
|
|
184
|
+
.local/
|
|
185
|
+
scratch/
|
|
186
|
+
|
|
187
|
+
# Uploaded files (in production these would be handled differently)
|
|
188
|
+
uploads/
|
|
189
|
+
media/
|
|
190
|
+
|
|
191
|
+
# Static files collection
|
|
192
|
+
staticfiles/
|
|
193
|
+
static_root/
|