@exulu/backend 1.48.2 → 1.49.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +351 -42
- package/dist/index.d.cts +96 -1
- package/dist/index.d.ts +96 -1
- package/dist/index.js +340 -38
- package/ee/{markdown.ts → chunking/markdown.ts} +2 -2
- package/ee/python/README.md +295 -0
- package/ee/python/documents/processing/README.md +155 -0
- package/ee/{documents → python/documents}/processing/doc_processor.ts +25 -17
- package/ee/{documents/processing/pdf_to_markdown.py → python/documents/processing/document_to_markdown.py} +3 -10
- package/ee/python/setup.sh +180 -0
- package/package.json +14 -3
- package/scripts/postinstall.cjs +149 -0
- package/.agents/skills/mintlify/SKILL.md +0 -347
- package/.editorconfig +0 -15
- package/.eslintrc.json +0 -52
- package/.github/workflows/release-backend.yml +0 -38
- package/.husky/commit-msg +0 -1
- package/.jscpd.json +0 -18
- package/.mcp.json +0 -25
- package/.nvmrc +0 -1
- package/.prettierignore +0 -5
- package/.prettierrc.json +0 -12
- package/CHANGELOG.md +0 -8
- package/SECURITY.md +0 -5
- package/commitlint.config.js +0 -4
- package/devops/documentation/patch-older-releases.md +0 -42
- package/ee/documents/processing/build_pdf_processor.sh +0 -35
- package/ee/documents/processing/chunk_markdown.py +0 -263
- package/ee/documents/processing/pdf_processor.spec +0 -115
- package/eslint.config.js +0 -88
- package/jest.config.ts +0 -25
- package/mintlify-docs/.mintignore +0 -7
- package/mintlify-docs/AGENTS.md +0 -33
- package/mintlify-docs/CLAUDE.MD +0 -50
- package/mintlify-docs/CONTRIBUTING.md +0 -32
- package/mintlify-docs/LICENSE +0 -21
- package/mintlify-docs/README.md +0 -55
- package/mintlify-docs/ai-tools/claude-code.mdx +0 -43
- package/mintlify-docs/ai-tools/cursor.mdx +0 -39
- package/mintlify-docs/ai-tools/windsurf.mdx +0 -39
- package/mintlify-docs/api-reference/core-types/agent-types.mdx +0 -110
- package/mintlify-docs/api-reference/core-types/analytics-types.mdx +0 -95
- package/mintlify-docs/api-reference/core-types/configuration-types.mdx +0 -83
- package/mintlify-docs/api-reference/core-types/evaluation-types.mdx +0 -106
- package/mintlify-docs/api-reference/core-types/job-types.mdx +0 -135
- package/mintlify-docs/api-reference/core-types/overview.mdx +0 -73
- package/mintlify-docs/api-reference/core-types/prompt-types.mdx +0 -102
- package/mintlify-docs/api-reference/core-types/rbac-types.mdx +0 -163
- package/mintlify-docs/api-reference/core-types/session-types.mdx +0 -77
- package/mintlify-docs/api-reference/core-types/user-management.mdx +0 -112
- package/mintlify-docs/api-reference/core-types/workflow-types.mdx +0 -88
- package/mintlify-docs/api-reference/core-types.mdx +0 -585
- package/mintlify-docs/api-reference/dynamic-types.mdx +0 -851
- package/mintlify-docs/api-reference/endpoint/create.mdx +0 -4
- package/mintlify-docs/api-reference/endpoint/delete.mdx +0 -4
- package/mintlify-docs/api-reference/endpoint/get.mdx +0 -4
- package/mintlify-docs/api-reference/endpoint/webhook.mdx +0 -4
- package/mintlify-docs/api-reference/introduction.mdx +0 -661
- package/mintlify-docs/api-reference/mutations.mdx +0 -1012
- package/mintlify-docs/api-reference/openapi.json +0 -217
- package/mintlify-docs/api-reference/queries.mdx +0 -1154
- package/mintlify-docs/backend/introduction.mdx +0 -218
- package/mintlify-docs/changelog.mdx +0 -387
- package/mintlify-docs/community-edition.mdx +0 -304
- package/mintlify-docs/core/exulu-agent/api-reference.mdx +0 -894
- package/mintlify-docs/core/exulu-agent/configuration.mdx +0 -690
- package/mintlify-docs/core/exulu-agent/introduction.mdx +0 -552
- package/mintlify-docs/core/exulu-app/api-reference.mdx +0 -481
- package/mintlify-docs/core/exulu-app/configuration.mdx +0 -319
- package/mintlify-docs/core/exulu-app/introduction.mdx +0 -117
- package/mintlify-docs/core/exulu-authentication.mdx +0 -810
- package/mintlify-docs/core/exulu-chunkers/api-reference.mdx +0 -1011
- package/mintlify-docs/core/exulu-chunkers/configuration.mdx +0 -596
- package/mintlify-docs/core/exulu-chunkers/introduction.mdx +0 -403
- package/mintlify-docs/core/exulu-context/api-reference.mdx +0 -911
- package/mintlify-docs/core/exulu-context/configuration.mdx +0 -648
- package/mintlify-docs/core/exulu-context/introduction.mdx +0 -394
- package/mintlify-docs/core/exulu-database.mdx +0 -811
- package/mintlify-docs/core/exulu-default-agents.mdx +0 -545
- package/mintlify-docs/core/exulu-eval/api-reference.mdx +0 -772
- package/mintlify-docs/core/exulu-eval/configuration.mdx +0 -680
- package/mintlify-docs/core/exulu-eval/introduction.mdx +0 -459
- package/mintlify-docs/core/exulu-logging.mdx +0 -464
- package/mintlify-docs/core/exulu-otel.mdx +0 -670
- package/mintlify-docs/core/exulu-queues/api-reference.mdx +0 -648
- package/mintlify-docs/core/exulu-queues/configuration.mdx +0 -650
- package/mintlify-docs/core/exulu-queues/introduction.mdx +0 -474
- package/mintlify-docs/core/exulu-reranker/api-reference.mdx +0 -630
- package/mintlify-docs/core/exulu-reranker/configuration.mdx +0 -663
- package/mintlify-docs/core/exulu-reranker/introduction.mdx +0 -516
- package/mintlify-docs/core/exulu-tool/api-reference.mdx +0 -723
- package/mintlify-docs/core/exulu-tool/configuration.mdx +0 -805
- package/mintlify-docs/core/exulu-tool/introduction.mdx +0 -539
- package/mintlify-docs/core/exulu-variables/api-reference.mdx +0 -699
- package/mintlify-docs/core/exulu-variables/configuration.mdx +0 -736
- package/mintlify-docs/core/exulu-variables/introduction.mdx +0 -511
- package/mintlify-docs/development.mdx +0 -94
- package/mintlify-docs/docs.json +0 -248
- package/mintlify-docs/enterprise-edition.mdx +0 -538
- package/mintlify-docs/essentials/code.mdx +0 -35
- package/mintlify-docs/essentials/images.mdx +0 -59
- package/mintlify-docs/essentials/markdown.mdx +0 -88
- package/mintlify-docs/essentials/navigation.mdx +0 -87
- package/mintlify-docs/essentials/reusable-snippets.mdx +0 -110
- package/mintlify-docs/essentials/settings.mdx +0 -318
- package/mintlify-docs/favicon.svg +0 -3
- package/mintlify-docs/frontend/introduction.mdx +0 -39
- package/mintlify-docs/getting-started.mdx +0 -267
- package/mintlify-docs/guides/custom-agent.mdx +0 -608
- package/mintlify-docs/guides/first-agent.mdx +0 -315
- package/mintlify-docs/images/admin_ui.png +0 -0
- package/mintlify-docs/images/contexts.png +0 -0
- package/mintlify-docs/images/create_agents.png +0 -0
- package/mintlify-docs/images/evals.png +0 -0
- package/mintlify-docs/images/graphql.png +0 -0
- package/mintlify-docs/images/graphql_api.png +0 -0
- package/mintlify-docs/images/hero-dark.png +0 -0
- package/mintlify-docs/images/hero-light.png +0 -0
- package/mintlify-docs/images/hero.png +0 -0
- package/mintlify-docs/images/knowledge_sources.png +0 -0
- package/mintlify-docs/images/mcp.png +0 -0
- package/mintlify-docs/images/scaling.png +0 -0
- package/mintlify-docs/index.mdx +0 -411
- package/mintlify-docs/logo/dark.svg +0 -9
- package/mintlify-docs/logo/light.svg +0 -9
- package/mintlify-docs/partners.mdx +0 -558
- package/mintlify-docs/products.mdx +0 -77
- package/mintlify-docs/snippets/snippet-intro.mdx +0 -4
- package/mintlify-docs/styles.css +0 -207
- package/ngrok.bash +0 -1
- package/ngrok.md +0 -6
- package/ngrok.yml +0 -10
- package/release.config.cjs +0 -15
- package/skills-lock.json +0 -10
- package/types/context-processor.ts +0 -45
- package/types/enums/eval-types.ts +0 -5
- package/types/enums/field-types.ts +0 -1
- package/types/enums/jobs.ts +0 -11
- package/types/enums/statistics.ts +0 -13
- package/types/exulu-table-definition.ts +0 -79
- package/types/file-types.ts +0 -18
- package/types/models/agent-session.ts +0 -27
- package/types/models/agent.ts +0 -68
- package/types/models/context.ts +0 -53
- package/types/models/embedding.ts +0 -17
- package/types/models/eval-run.ts +0 -40
- package/types/models/exulu-agent-tool-config.ts +0 -11
- package/types/models/item.ts +0 -21
- package/types/models/job.ts +0 -8
- package/types/models/project.ts +0 -16
- package/types/models/rate-limiter-rules.ts +0 -7
- package/types/models/test-case.ts +0 -25
- package/types/models/tool.ts +0 -9
- package/types/models/user-role.ts +0 -12
- package/types/models/user.ts +0 -20
- package/types/models/variable.ts +0 -8
- package/types/models/vector-methods.ts +0 -7
- package/types/provider-config.ts +0 -21
- package/types/queue-config.ts +0 -16
- package/types/rbac-rights-modes.ts +0 -1
- package/types/statistics.ts +0 -20
- package/types/workflow.ts +0 -31
- /package/ee/{documents → python/documents}/THIRD_PARTY_LICENSES/docling.txt +0 -0
- /package/ee/{documents/processing → python}/requirements.txt +0 -0
|
@@ -1,263 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
"""
|
|
3
|
-
Markdown Chunker using Docling HybridChunker
|
|
4
|
-
Converts markdown files into chunks using Docling's hybrid chunking approach.
|
|
5
|
-
|
|
6
|
-
Usage:
|
|
7
|
-
chunk_markdown.py <markdown_file_path> [-o OUTPUT_PATH] [--max-tokens MAX_TOKENS]
|
|
8
|
-
"""
|
|
9
|
-
|
|
10
|
-
import sys
|
|
11
|
-
import os
|
|
12
|
-
import warnings
|
|
13
|
-
import argparse
|
|
14
|
-
import json
|
|
15
|
-
from pathlib import Path
|
|
16
|
-
|
|
17
|
-
# Suppress warnings
|
|
18
|
-
warnings.filterwarnings('ignore')
|
|
19
|
-
os.environ['PYTHONWARNINGS'] = 'ignore'
|
|
20
|
-
|
|
21
|
-
from docling.document_converter import DocumentConverter
|
|
22
|
-
from docling.chunking import HybridChunker
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
def find_page_for_chunk(chunk_text: str, pages_data: list) -> int:
|
|
26
|
-
"""
|
|
27
|
-
Find which page a chunk belongs to by matching text content.
|
|
28
|
-
|
|
29
|
-
Args:
|
|
30
|
-
chunk_text: The text content of the chunk
|
|
31
|
-
pages_data: List of page objects from index_validated.json
|
|
32
|
-
|
|
33
|
-
Returns:
|
|
34
|
-
Page number (1-indexed) or None if not found
|
|
35
|
-
"""
|
|
36
|
-
# Take first 100 characters for matching (remove extra whitespace)
|
|
37
|
-
search_text = ' '.join(chunk_text[:100].split())
|
|
38
|
-
|
|
39
|
-
# Search through all pages
|
|
40
|
-
for page in pages_data:
|
|
41
|
-
# Check both content and vlm_corrected_text
|
|
42
|
-
content_sources = [page.get('content', '')]
|
|
43
|
-
if page.get('vlm_corrected_text'):
|
|
44
|
-
content_sources.append(page['vlm_corrected_text'])
|
|
45
|
-
|
|
46
|
-
for content in content_sources:
|
|
47
|
-
# Normalize whitespace in content for comparison
|
|
48
|
-
normalized_content = ' '.join(content.split())
|
|
49
|
-
|
|
50
|
-
if search_text in normalized_content:
|
|
51
|
-
return page.get('page')
|
|
52
|
-
|
|
53
|
-
return None
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
def chunk_markdown_file(markdown_path: str, max_tokens: int = 512, output_path: str = None, use_markdown_tables: bool = True, index_json_path: str = None) -> dict:
|
|
57
|
-
"""
|
|
58
|
-
Chunk a markdown file using Docling's HybridChunker.
|
|
59
|
-
|
|
60
|
-
Args:
|
|
61
|
-
markdown_path: Path to the markdown file
|
|
62
|
-
max_tokens: Maximum number of tokens per chunk (default: 512)
|
|
63
|
-
output_path: Optional output path for JSON file
|
|
64
|
-
use_markdown_tables: Use markdown table format instead of triplets (default: True)
|
|
65
|
-
index_json_path: Optional path to index_validated.json for page mapping
|
|
66
|
-
|
|
67
|
-
Returns:
|
|
68
|
-
Dictionary containing chunks and metadata
|
|
69
|
-
"""
|
|
70
|
-
# Convert the markdown document
|
|
71
|
-
print(f"Converting markdown document: {markdown_path}", file=sys.stderr)
|
|
72
|
-
converter = DocumentConverter()
|
|
73
|
-
result = converter.convert(source=markdown_path)
|
|
74
|
-
doc = result.document
|
|
75
|
-
|
|
76
|
-
# Initialize the chunker with specified max_tokens and markdown tables enabled
|
|
77
|
-
print(f"Initializing HybridChunker with max_tokens={max_tokens}, use_markdown_tables={use_markdown_tables}", file=sys.stderr)
|
|
78
|
-
chunker = HybridChunker(
|
|
79
|
-
max_tokens=max_tokens,
|
|
80
|
-
use_markdown_tables=use_markdown_tables
|
|
81
|
-
)
|
|
82
|
-
|
|
83
|
-
# Load page data for page mapping if provided
|
|
84
|
-
pages_data = None
|
|
85
|
-
if index_json_path and Path(index_json_path).exists():
|
|
86
|
-
print(f"Loading page data from: {index_json_path}", file=sys.stderr)
|
|
87
|
-
with open(index_json_path, 'r', encoding='utf-8') as f:
|
|
88
|
-
pages_data = json.load(f)
|
|
89
|
-
|
|
90
|
-
# Chunk the document
|
|
91
|
-
print(f"Chunking document...", file=sys.stderr)
|
|
92
|
-
chunk_iter = chunker.chunk(dl_doc=doc)
|
|
93
|
-
|
|
94
|
-
# Process chunks and collect results
|
|
95
|
-
chunks = []
|
|
96
|
-
for i, chunk in enumerate(chunk_iter):
|
|
97
|
-
# Get the context-enriched text (recommended for embeddings)
|
|
98
|
-
enriched_text = chunker.contextualize(chunk=chunk)
|
|
99
|
-
|
|
100
|
-
# Extract heading hierarchy from chunk metadata
|
|
101
|
-
heading_hierarchy = []
|
|
102
|
-
if hasattr(chunk.meta, 'headings') and chunk.meta.headings:
|
|
103
|
-
heading_hierarchy = chunk.meta.headings if isinstance(chunk.meta.headings, list) else [chunk.meta.headings]
|
|
104
|
-
|
|
105
|
-
# Find page number by matching chunk text with pages data
|
|
106
|
-
page_number = None
|
|
107
|
-
if pages_data:
|
|
108
|
-
page_number = find_page_for_chunk(chunk.text, pages_data)
|
|
109
|
-
|
|
110
|
-
# Debug for first few chunks
|
|
111
|
-
if i < 3:
|
|
112
|
-
print(f"\nDEBUG Chunk {i}:", file=sys.stderr)
|
|
113
|
-
print(f" Text preview: {chunk.text[:100]}...", file=sys.stderr)
|
|
114
|
-
print(f" Heading hierarchy: {heading_hierarchy}", file=sys.stderr)
|
|
115
|
-
print(f" Page number: {page_number}", file=sys.stderr)
|
|
116
|
-
|
|
117
|
-
chunks.append({
|
|
118
|
-
"chunk_id": i,
|
|
119
|
-
"text": chunk.text,
|
|
120
|
-
"enriched_text": enriched_text,
|
|
121
|
-
"metadata": {
|
|
122
|
-
"page": page_number,
|
|
123
|
-
"path": getattr(chunk, 'path', None),
|
|
124
|
-
"headings": heading_hierarchy
|
|
125
|
-
}
|
|
126
|
-
})
|
|
127
|
-
|
|
128
|
-
# Return the results
|
|
129
|
-
return {
|
|
130
|
-
"source": markdown_path,
|
|
131
|
-
"total_chunks": len(chunks),
|
|
132
|
-
"max_tokens": max_tokens,
|
|
133
|
-
"chunks": chunks
|
|
134
|
-
}
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
def main():
|
|
138
|
-
"""Main entry point for the script."""
|
|
139
|
-
# Set up argument parser
|
|
140
|
-
parser = argparse.ArgumentParser(
|
|
141
|
-
description='Chunk markdown file using Docling HybridChunker',
|
|
142
|
-
formatter_class=argparse.RawDescriptionHelpFormatter
|
|
143
|
-
)
|
|
144
|
-
|
|
145
|
-
parser.add_argument(
|
|
146
|
-
'markdown_path',
|
|
147
|
-
type=str,
|
|
148
|
-
help='Path to the markdown file to chunk'
|
|
149
|
-
)
|
|
150
|
-
|
|
151
|
-
parser.add_argument(
|
|
152
|
-
'-o', '--output',
|
|
153
|
-
type=str,
|
|
154
|
-
dest='output_path',
|
|
155
|
-
help='Output path for the JSON file (default: same name as markdown with _chunks.json suffix)'
|
|
156
|
-
)
|
|
157
|
-
|
|
158
|
-
parser.add_argument(
|
|
159
|
-
'--max-tokens',
|
|
160
|
-
type=int,
|
|
161
|
-
dest='max_tokens',
|
|
162
|
-
default=512,
|
|
163
|
-
help='Maximum number of tokens per chunk (default: 512)'
|
|
164
|
-
)
|
|
165
|
-
|
|
166
|
-
parser.add_argument(
|
|
167
|
-
'--no-markdown-tables',
|
|
168
|
-
action='store_true',
|
|
169
|
-
dest='no_markdown_tables',
|
|
170
|
-
help='Disable markdown table format (use triplets instead)'
|
|
171
|
-
)
|
|
172
|
-
|
|
173
|
-
parser.add_argument(
|
|
174
|
-
'--index-json',
|
|
175
|
-
type=str,
|
|
176
|
-
dest='index_json_path',
|
|
177
|
-
help='Path to processed.json for page number mapping'
|
|
178
|
-
)
|
|
179
|
-
|
|
180
|
-
# Parse arguments
|
|
181
|
-
args = parser.parse_args()
|
|
182
|
-
|
|
183
|
-
markdown_path = args.markdown_path
|
|
184
|
-
output_path = args.output_path
|
|
185
|
-
max_tokens = args.max_tokens
|
|
186
|
-
use_markdown_tables = not args.no_markdown_tables
|
|
187
|
-
index_json_path = args.index_json_path
|
|
188
|
-
|
|
189
|
-
# Auto-detect index_validated.json if not provided
|
|
190
|
-
if not index_json_path:
|
|
191
|
-
# Try to find index_validated.json in the same directory as the markdown file
|
|
192
|
-
markdown_dir = Path(markdown_path).parent
|
|
193
|
-
potential_index = markdown_dir / 'processed.json'
|
|
194
|
-
if potential_index.exists():
|
|
195
|
-
index_json_path = str(potential_index)
|
|
196
|
-
print(f"Auto-detected index file: {index_json_path}", file=sys.stderr)
|
|
197
|
-
|
|
198
|
-
# Validate the file exists
|
|
199
|
-
if not Path(markdown_path).exists():
|
|
200
|
-
print(f"Error: File not found: {markdown_path}", file=sys.stderr)
|
|
201
|
-
sys.exit(1)
|
|
202
|
-
|
|
203
|
-
# Default: same name as markdown but with _chunks.json suffix
|
|
204
|
-
if not output_path:
|
|
205
|
-
markdown_file = Path(markdown_path)
|
|
206
|
-
output_path = str(markdown_file.parent / f"chunks.json")
|
|
207
|
-
else:
|
|
208
|
-
# If output_path is a directory, append default filename
|
|
209
|
-
output_path_obj = Path(output_path)
|
|
210
|
-
if output_path_obj.is_dir():
|
|
211
|
-
markdown_file = Path(markdown_path)
|
|
212
|
-
output_path = str(output_path_obj / f"chunks.json")
|
|
213
|
-
elif not output_path_obj.suffix:
|
|
214
|
-
# If no extension provided, treat as directory
|
|
215
|
-
output_path_obj.mkdir(exist_ok=True)
|
|
216
|
-
markdown_file = Path(markdown_path)
|
|
217
|
-
output_path = str(output_path_obj / f"chunks.json")
|
|
218
|
-
|
|
219
|
-
try:
|
|
220
|
-
# Chunk the markdown file
|
|
221
|
-
print(f"Processing markdown: {markdown_path}", file=sys.stderr)
|
|
222
|
-
print(f"Max tokens per chunk: {max_tokens}", file=sys.stderr)
|
|
223
|
-
print(f"Use markdown tables: {use_markdown_tables}", file=sys.stderr)
|
|
224
|
-
result = chunk_markdown_file(
|
|
225
|
-
markdown_path,
|
|
226
|
-
max_tokens=max_tokens,
|
|
227
|
-
output_path=output_path,
|
|
228
|
-
use_markdown_tables=use_markdown_tables,
|
|
229
|
-
index_json_path=index_json_path
|
|
230
|
-
)
|
|
231
|
-
|
|
232
|
-
# Save to JSON file
|
|
233
|
-
with open(output_path, 'w', encoding='utf-8') as f:
|
|
234
|
-
json.dump(result, f, indent=2, ensure_ascii=False)
|
|
235
|
-
f.flush()
|
|
236
|
-
|
|
237
|
-
print(f"\nSuccessfully saved {result['total_chunks']} chunks to: {output_path}", file=sys.stderr)
|
|
238
|
-
|
|
239
|
-
# Print stats
|
|
240
|
-
total_chars = sum(len(chunk['text']) for chunk in result['chunks'])
|
|
241
|
-
avg_chars = total_chars / len(result['chunks']) if result['chunks'] else 0
|
|
242
|
-
|
|
243
|
-
print(f"\nChunking stats:", file=sys.stderr)
|
|
244
|
-
print(f" Total chunks: {result['total_chunks']}", file=sys.stderr)
|
|
245
|
-
print(f" Total characters: {total_chars}", file=sys.stderr)
|
|
246
|
-
print(f" Average characters per chunk: {avg_chars:.0f}", file=sys.stderr)
|
|
247
|
-
|
|
248
|
-
sys.stderr.flush()
|
|
249
|
-
sys.stdout.flush()
|
|
250
|
-
|
|
251
|
-
# Exit cleanly
|
|
252
|
-
os._exit(0)
|
|
253
|
-
|
|
254
|
-
except Exception as e:
|
|
255
|
-
print(f"Error processing markdown: {str(e)}", file=sys.stderr)
|
|
256
|
-
import traceback
|
|
257
|
-
traceback.print_exc(file=sys.stderr)
|
|
258
|
-
sys.stderr.flush()
|
|
259
|
-
os._exit(1)
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
if __name__ == "__main__":
|
|
263
|
-
main()
|
|
@@ -1,115 +0,0 @@
|
|
|
1
|
-
# -*- mode: python ; coding: utf-8 -*-
|
|
2
|
-
from PyInstaller.utils.hooks import copy_metadata, collect_data_files
|
|
3
|
-
import os
|
|
4
|
-
|
|
5
|
-
block_cipher = None
|
|
6
|
-
|
|
7
|
-
# Collect package metadata for packages that need it
|
|
8
|
-
datas = []
|
|
9
|
-
datas += copy_metadata('docling')
|
|
10
|
-
datas += copy_metadata('docling-core')
|
|
11
|
-
datas += copy_metadata('docling-parse')
|
|
12
|
-
datas += copy_metadata('docling-ibm-models')
|
|
13
|
-
datas += copy_metadata('transformers')
|
|
14
|
-
datas += copy_metadata('torch')
|
|
15
|
-
datas += copy_metadata('tokenizers')
|
|
16
|
-
datas += copy_metadata('huggingface-hub')
|
|
17
|
-
datas += copy_metadata('pydantic')
|
|
18
|
-
datas += copy_metadata('pydantic-core')
|
|
19
|
-
|
|
20
|
-
# Collect data files from docling packages
|
|
21
|
-
datas += collect_data_files('docling_parse')
|
|
22
|
-
datas += collect_data_files('docling')
|
|
23
|
-
datas += collect_data_files('docling_core')
|
|
24
|
-
datas += collect_data_files('docling_ibm_models')
|
|
25
|
-
datas += collect_data_files('transformers')
|
|
26
|
-
|
|
27
|
-
# Collect all data files from docling and transformers packages
|
|
28
|
-
a = Analysis(
|
|
29
|
-
['pdf_processor.py'],
|
|
30
|
-
pathex=[],
|
|
31
|
-
binaries=[],
|
|
32
|
-
datas=datas,
|
|
33
|
-
hiddenimports=[
|
|
34
|
-
'docling',
|
|
35
|
-
'docling.document_converter',
|
|
36
|
-
'docling.chunking',
|
|
37
|
-
'docling.models',
|
|
38
|
-
'docling.models.plugins',
|
|
39
|
-
'docling.models.plugins.defaults',
|
|
40
|
-
'docling.backend',
|
|
41
|
-
'docling.backend.docling_parse_backend',
|
|
42
|
-
'docling.backend.asciidoc_backend',
|
|
43
|
-
'docling.backend.html_backend',
|
|
44
|
-
'docling.backend.md_backend',
|
|
45
|
-
'docling.backend.msexcel_backend',
|
|
46
|
-
'docling.backend.mspowerpoint_backend',
|
|
47
|
-
'docling.backend.msword_backend',
|
|
48
|
-
'docling.datamodel',
|
|
49
|
-
'docling.datamodel.document',
|
|
50
|
-
'docling_core',
|
|
51
|
-
'docling_core.transforms.chunker',
|
|
52
|
-
'docling_core.transforms.chunker.tokenizer',
|
|
53
|
-
'docling_core.transforms.chunker.tokenizer.huggingface',
|
|
54
|
-
'transformers',
|
|
55
|
-
'transformers.models',
|
|
56
|
-
'transformers.models.auto',
|
|
57
|
-
'torch',
|
|
58
|
-
'numpy',
|
|
59
|
-
'PIL',
|
|
60
|
-
'pdfplumber',
|
|
61
|
-
'pypdf',
|
|
62
|
-
'pikepdf',
|
|
63
|
-
'lxml',
|
|
64
|
-
'bs4',
|
|
65
|
-
'tiktoken',
|
|
66
|
-
'tokenizers',
|
|
67
|
-
'sentencepiece',
|
|
68
|
-
'safetensors',
|
|
69
|
-
'huggingface_hub',
|
|
70
|
-
'tqdm',
|
|
71
|
-
'regex',
|
|
72
|
-
'requests',
|
|
73
|
-
'urllib3',
|
|
74
|
-
'certifi',
|
|
75
|
-
'charset_normalizer',
|
|
76
|
-
'idna',
|
|
77
|
-
'packaging',
|
|
78
|
-
'filelock',
|
|
79
|
-
'pyyaml',
|
|
80
|
-
'jinja2',
|
|
81
|
-
'markupsafe',
|
|
82
|
-
],
|
|
83
|
-
hookspath=[],
|
|
84
|
-
hooksconfig={},
|
|
85
|
-
runtime_hooks=[],
|
|
86
|
-
excludes=[],
|
|
87
|
-
win_no_prefer_redirects=False,
|
|
88
|
-
win_private_assemblies=False,
|
|
89
|
-
cipher=block_cipher,
|
|
90
|
-
noarchive=False,
|
|
91
|
-
)
|
|
92
|
-
|
|
93
|
-
pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)
|
|
94
|
-
|
|
95
|
-
exe = EXE(
|
|
96
|
-
pyz,
|
|
97
|
-
a.scripts,
|
|
98
|
-
a.binaries,
|
|
99
|
-
a.zipfiles,
|
|
100
|
-
a.datas,
|
|
101
|
-
[],
|
|
102
|
-
name='pdf_processor',
|
|
103
|
-
debug=False,
|
|
104
|
-
bootloader_ignore_signals=False,
|
|
105
|
-
strip=False,
|
|
106
|
-
upx=True,
|
|
107
|
-
upx_exclude=[],
|
|
108
|
-
runtime_tmpdir=None,
|
|
109
|
-
console=True,
|
|
110
|
-
disable_windowed_traceback=False,
|
|
111
|
-
argv_emulation=False,
|
|
112
|
-
target_arch=None,
|
|
113
|
-
codesign_identity=None,
|
|
114
|
-
entitlements_file=None,
|
|
115
|
-
)
|
package/eslint.config.js
DELETED
|
@@ -1,88 +0,0 @@
|
|
|
1
|
-
import tseslint from '@typescript-eslint/eslint-plugin';
|
|
2
|
-
import tsparser from '@typescript-eslint/parser';
|
|
3
|
-
import js from '@eslint/js';
|
|
4
|
-
|
|
5
|
-
export default [
|
|
6
|
-
{
|
|
7
|
-
ignores: ['dist/**', 'node_modules/**', '*.config.js', '*.config.cjs', '*.config.ts']
|
|
8
|
-
},
|
|
9
|
-
js.configs.recommended,
|
|
10
|
-
{
|
|
11
|
-
files: ['**/*.ts', '**/*.tsx'],
|
|
12
|
-
languageOptions: {
|
|
13
|
-
parser: tsparser,
|
|
14
|
-
parserOptions: {
|
|
15
|
-
ecmaVersion: 'latest',
|
|
16
|
-
sourceType: 'module',
|
|
17
|
-
project: './tsconfig.json'
|
|
18
|
-
},
|
|
19
|
-
globals: {
|
|
20
|
-
console: 'readonly',
|
|
21
|
-
process: 'readonly',
|
|
22
|
-
Buffer: 'readonly',
|
|
23
|
-
setTimeout: 'readonly',
|
|
24
|
-
clearTimeout: 'readonly',
|
|
25
|
-
setInterval: 'readonly',
|
|
26
|
-
clearInterval: 'readonly',
|
|
27
|
-
setImmediate: 'readonly',
|
|
28
|
-
clearImmediate: 'readonly',
|
|
29
|
-
__dirname: 'readonly',
|
|
30
|
-
__filename: 'readonly',
|
|
31
|
-
exports: 'writable',
|
|
32
|
-
module: 'writable',
|
|
33
|
-
require: 'readonly',
|
|
34
|
-
global: 'readonly',
|
|
35
|
-
performance: 'readonly',
|
|
36
|
-
TextDecoder: 'readonly',
|
|
37
|
-
TextEncoder: 'readonly'
|
|
38
|
-
}
|
|
39
|
-
},
|
|
40
|
-
plugins: {
|
|
41
|
-
'@typescript-eslint': tseslint
|
|
42
|
-
},
|
|
43
|
-
rules: {
|
|
44
|
-
...tseslint.configs.recommended.rules,
|
|
45
|
-
...tseslint.configs['recommended-requiring-type-checking'].rules,
|
|
46
|
-
'no-console': 'warn',
|
|
47
|
-
'no-extra-boolean-cast': 'off',
|
|
48
|
-
'no-async-promise-executor': 'off',
|
|
49
|
-
'no-useless-escape': 'off',
|
|
50
|
-
'@typescript-eslint/no-unused-vars': [
|
|
51
|
-
'error',
|
|
52
|
-
{
|
|
53
|
-
argsIgnorePattern: '^_',
|
|
54
|
-
varsIgnorePattern: '^_',
|
|
55
|
-
args: 'none'
|
|
56
|
-
}
|
|
57
|
-
],
|
|
58
|
-
'@typescript-eslint/explicit-function-return-type': [
|
|
59
|
-
'warn',
|
|
60
|
-
{
|
|
61
|
-
allowExpressions: true,
|
|
62
|
-
allowTypedFunctionExpressions: true
|
|
63
|
-
}
|
|
64
|
-
],
|
|
65
|
-
'@typescript-eslint/no-redundant-type-constituents': 'off',
|
|
66
|
-
'@typescript-eslint/no-explicit-any': 'off',
|
|
67
|
-
'@typescript-eslint/no-floating-promises': 'off',
|
|
68
|
-
'@typescript-eslint/await-thenable': 'off',
|
|
69
|
-
'@typescript-eslint/no-misused-promises': 'off',
|
|
70
|
-
'@typescript-eslint/unbound-method': 'off',
|
|
71
|
-
'@typescript-eslint/no-unnecessary-type-assertion': 'warn',
|
|
72
|
-
'@typescript-eslint/prefer-nullish-coalescing': 'warn',
|
|
73
|
-
'@typescript-eslint/prefer-optional-chain': 'warn',
|
|
74
|
-
'no-unused-expressions': 'off',
|
|
75
|
-
'@typescript-eslint/no-unused-expressions': 'error',
|
|
76
|
-
'require-await': 'off',
|
|
77
|
-
'no-case-declarations': 'off',
|
|
78
|
-
'@typescript-eslint/require-await': 'off',
|
|
79
|
-
'no-undef': 'off',
|
|
80
|
-
'@typescript-eslint/no-unsafe-call': 'off',
|
|
81
|
-
'@typescript-eslint/no-unsafe-member-access': 'off',
|
|
82
|
-
'@typescript-eslint/no-unsafe-assignment': 'off',
|
|
83
|
-
'@typescript-eslint/no-unsafe-argument': 'off',
|
|
84
|
-
'@typescript-eslint/no-unsafe-return': 'off',
|
|
85
|
-
'@typescript-eslint/ban-types': 'off'
|
|
86
|
-
}
|
|
87
|
-
}
|
|
88
|
-
];
|
package/jest.config.ts
DELETED
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
import type { Config } from "jest";
|
|
2
|
-
|
|
3
|
-
const config: Config = {
|
|
4
|
-
preset: "ts-jest",
|
|
5
|
-
testEnvironment: "node",
|
|
6
|
-
roots: ["<rootDir>/src"],
|
|
7
|
-
testMatch: ["**/__tests__/**/*.ts", "**/*.test.ts", "**/*.spec.ts"],
|
|
8
|
-
collectCoverageFrom: [
|
|
9
|
-
"src/**/*.ts",
|
|
10
|
-
"!src/**/*.d.ts",
|
|
11
|
-
"!src/**/*.test.ts",
|
|
12
|
-
"!src/**/*.spec.ts",
|
|
13
|
-
"!src/index.ts",
|
|
14
|
-
],
|
|
15
|
-
coverageDirectory: "coverage",
|
|
16
|
-
coverageReporters: ["text", "lcov", "html"],
|
|
17
|
-
moduleNameMapper: {
|
|
18
|
-
"@EXULU_TYPES/(.*)": "<rootDir>/types/$1",
|
|
19
|
-
},
|
|
20
|
-
setupFilesAfterEnv: ["<rootDir>/src/__tests__/setup.ts"],
|
|
21
|
-
verbose: true,
|
|
22
|
-
testTimeout: 10000,
|
|
23
|
-
};
|
|
24
|
-
|
|
25
|
-
export default config;
|
package/mintlify-docs/AGENTS.md
DELETED
|
@@ -1,33 +0,0 @@
|
|
|
1
|
-
> **First-time setup**: Customize this file for your project. Prompt the user to customize this file for their project.
|
|
2
|
-
> For Mintlify product knowledge (components, configuration, writing standards),
|
|
3
|
-
> install the Mintlify skill: `npx skills add https://mintlify.com/docs`
|
|
4
|
-
|
|
5
|
-
# Documentation project instructions
|
|
6
|
-
|
|
7
|
-
## About this project
|
|
8
|
-
|
|
9
|
-
- This is a documentation site built on [Mintlify](https://mintlify.com)
|
|
10
|
-
- Pages are MDX files with YAML frontmatter
|
|
11
|
-
- Configuration lives in `docs.json`
|
|
12
|
-
- Run `mint dev` to preview locally
|
|
13
|
-
- Run `mint broken-links` to check links
|
|
14
|
-
|
|
15
|
-
## Terminology
|
|
16
|
-
|
|
17
|
-
{/* Add product-specific terms and preferred usage */}
|
|
18
|
-
{/* Example: Use "workspace" not "project", "member" not "user" */}
|
|
19
|
-
|
|
20
|
-
## Style preferences
|
|
21
|
-
|
|
22
|
-
{/* Add any project-specific style rules below */}
|
|
23
|
-
|
|
24
|
-
- Use active voice and second person ("you")
|
|
25
|
-
- Keep sentences concise — one idea per sentence
|
|
26
|
-
- Use sentence case for headings
|
|
27
|
-
- Bold for UI elements: Click **Settings**
|
|
28
|
-
- Code formatting for file names, commands, paths, and code references
|
|
29
|
-
|
|
30
|
-
## Content boundaries
|
|
31
|
-
|
|
32
|
-
{/* Define what should and shouldn't be documented */}
|
|
33
|
-
{/* Example: Don't document internal admin features */}
|
package/mintlify-docs/CLAUDE.MD
DELETED
|
@@ -1,50 +0,0 @@
|
|
|
1
|
-
# Mintlify documentation
|
|
2
|
-
|
|
3
|
-
## Working relationship
|
|
4
|
-
- You can push back on ideas-this can lead to better documentation. Cite sources and explain your reasoning when you do so
|
|
5
|
-
- ALWAYS ask for clarification rather than making assumptions
|
|
6
|
-
- NEVER lie, guess, or make up anything
|
|
7
|
-
|
|
8
|
-
## Project context
|
|
9
|
-
- Format: MDX files with YAML frontmatter
|
|
10
|
-
- Config: docs.json for navigation, theme, settings
|
|
11
|
-
- Components: Mintlify components
|
|
12
|
-
|
|
13
|
-
## Content strategy
|
|
14
|
-
- Document just enough for user success - not too much, not too little
|
|
15
|
-
- Prioritize accuracy and usability
|
|
16
|
-
- Make content evergreen when possible
|
|
17
|
-
- Search for existing content before adding anything new. Avoid duplication unless it is done for a strategic reason
|
|
18
|
-
- Check existing patterns for consistency
|
|
19
|
-
- Start by making the smallest reasonable changes
|
|
20
|
-
|
|
21
|
-
## docs.json
|
|
22
|
-
|
|
23
|
-
- Refer to the [docs.json schema](https://mintlify.com/docs.json) when building the docs.json file and site navigation
|
|
24
|
-
|
|
25
|
-
## Frontmatter requirements for pages
|
|
26
|
-
- title: Clear, descriptive page title
|
|
27
|
-
- description: Concise summary for SEO/navigation
|
|
28
|
-
|
|
29
|
-
## Writing standards
|
|
30
|
-
- Second-person voice ("you")
|
|
31
|
-
- Prerequisites at start of procedural content
|
|
32
|
-
- Test all code examples before publishing
|
|
33
|
-
- Match style and formatting of existing pages
|
|
34
|
-
- Include both basic and advanced use cases
|
|
35
|
-
- Language tags on all code blocks
|
|
36
|
-
- Alt text on all images
|
|
37
|
-
- Relative paths for internal links
|
|
38
|
-
|
|
39
|
-
## Git workflow
|
|
40
|
-
- NEVER use --no-verify when committing
|
|
41
|
-
- Ask how to handle uncommitted changes before starting
|
|
42
|
-
- Create a new branch when no clear branch exists for changes
|
|
43
|
-
- Commit frequently throughout development
|
|
44
|
-
- NEVER skip or disable pre-commit hooks
|
|
45
|
-
|
|
46
|
-
## Do not
|
|
47
|
-
- Skip frontmatter on any MDX file
|
|
48
|
-
- Use absolute URLs for internal links
|
|
49
|
-
- Include untested code examples
|
|
50
|
-
- Make assumptions - always ask for clarification
|
|
@@ -1,32 +0,0 @@
|
|
|
1
|
-
# Contribute to the documentation
|
|
2
|
-
|
|
3
|
-
Thank you for your interest in contributing to our documentation! This guide will help you get started.
|
|
4
|
-
|
|
5
|
-
## How to contribute
|
|
6
|
-
|
|
7
|
-
### Option 1: Edit directly on GitHub
|
|
8
|
-
|
|
9
|
-
1. Navigate to the page you want to edit
|
|
10
|
-
2. Click the "Edit this file" button (the pencil icon)
|
|
11
|
-
3. Make your changes and submit a pull request
|
|
12
|
-
|
|
13
|
-
### Option 2: Local development
|
|
14
|
-
|
|
15
|
-
1. Fork and clone this repository
|
|
16
|
-
2. Install the Mintlify CLI: `npm i -g mint`
|
|
17
|
-
3. Create a branch for your changes
|
|
18
|
-
4. Make changes
|
|
19
|
-
5. Navigate to the docs directory (mintlify-docs) and run `mint dev`
|
|
20
|
-
6. Preview your changes at `http://localhost:3000`
|
|
21
|
-
7. Commit your changes and submit a pull request
|
|
22
|
-
|
|
23
|
-
For more details on local development, see our [development guide](development.mdx).
|
|
24
|
-
|
|
25
|
-
## Writing guidelines
|
|
26
|
-
|
|
27
|
-
- **Use active voice**: "Run the command" not "The command should be run"
|
|
28
|
-
- **Address the reader directly**: Use "you" instead of "the user"
|
|
29
|
-
- **Keep sentences concise**: Aim for one idea per sentence
|
|
30
|
-
- **Lead with the goal**: Start instructions with what the user wants to accomplish
|
|
31
|
-
- **Use consistent terminology**: Don't alternate between synonyms for the same concept
|
|
32
|
-
- **Include examples**: Show, don't just tell
|
package/mintlify-docs/LICENSE
DELETED
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
MIT License
|
|
2
|
-
|
|
3
|
-
Copyright (c) 2023 Mintlify
|
|
4
|
-
|
|
5
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
-
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
-
in the Software without restriction, including without limitation the rights
|
|
8
|
-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
-
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
-
furnished to do so, subject to the following conditions:
|
|
11
|
-
|
|
12
|
-
The above copyright notice and this permission notice shall be included in all
|
|
13
|
-
copies or substantial portions of the Software.
|
|
14
|
-
|
|
15
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
-
SOFTWARE.
|