layoutlm-forge 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- layoutlm_forge-0.1.0/LICENSE +21 -0
- layoutlm_forge-0.1.0/MANIFEST.in +6 -0
- layoutlm_forge-0.1.0/PKG-INFO +313 -0
- layoutlm_forge-0.1.0/README.md +262 -0
- layoutlm_forge-0.1.0/docs/api-reference.md +58 -0
- layoutlm_forge-0.1.0/docs/architecture.md +18 -0
- layoutlm_forge-0.1.0/docs/cli-reference.md +50 -0
- layoutlm_forge-0.1.0/docs/getting-started.md +42 -0
- layoutlm_forge-0.1.0/docs/index.md +23 -0
- layoutlm_forge-0.1.0/examples/basic_token_counting.py +32 -0
- layoutlm_forge-0.1.0/examples/chat_history_management.py +37 -0
- layoutlm_forge-0.1.0/examples/cost_optimization.py +39 -0
- layoutlm_forge-0.1.0/examples/document_ingestion_pipeline.py +50 -0
- layoutlm_forge-0.1.0/examples/rag_context_assembly.py +51 -0
- layoutlm_forge-0.1.0/pyproject.toml +95 -0
- layoutlm_forge-0.1.0/setup.cfg +4 -0
- layoutlm_forge-0.1.0/src/layoutlm_forge/__init__.py +43 -0
- layoutlm_forge-0.1.0/src/layoutlm_forge/api/__init__.py +1 -0
- layoutlm_forge-0.1.0/src/layoutlm_forge/api/app.py +66 -0
- layoutlm_forge-0.1.0/src/layoutlm_forge/api/routes/__init__.py +1 -0
- layoutlm_forge-0.1.0/src/layoutlm_forge/api/routes/chunker.py +47 -0
- layoutlm_forge-0.1.0/src/layoutlm_forge/api/routes/compression.py +37 -0
- layoutlm_forge-0.1.0/src/layoutlm_forge/api/routes/context.py +25 -0
- layoutlm_forge-0.1.0/src/layoutlm_forge/api/routes/cost.py +20 -0
- layoutlm_forge-0.1.0/src/layoutlm_forge/api/routes/tokenizer.py +41 -0
- layoutlm_forge-0.1.0/src/layoutlm_forge/api/schemas.py +94 -0
- layoutlm_forge-0.1.0/src/layoutlm_forge/chunker.py +375 -0
- layoutlm_forge-0.1.0/src/layoutlm_forge/cli/__init__.py +1 -0
- layoutlm_forge-0.1.0/src/layoutlm_forge/cli/main.py +280 -0
- layoutlm_forge-0.1.0/src/layoutlm_forge/compressor.py +427 -0
- layoutlm_forge-0.1.0/src/layoutlm_forge/context.py +334 -0
- layoutlm_forge-0.1.0/src/layoutlm_forge/cost.py +180 -0
- layoutlm_forge-0.1.0/src/layoutlm_forge/models.py +129 -0
- layoutlm_forge-0.1.0/src/layoutlm_forge/py.typed +1 -0
- layoutlm_forge-0.1.0/src/layoutlm_forge/tokenizer.py +237 -0
- layoutlm_forge-0.1.0/src/layoutlm_forge.egg-info/PKG-INFO +313 -0
- layoutlm_forge-0.1.0/src/layoutlm_forge.egg-info/SOURCES.txt +39 -0
- layoutlm_forge-0.1.0/src/layoutlm_forge.egg-info/dependency_links.txt +1 -0
- layoutlm_forge-0.1.0/src/layoutlm_forge.egg-info/entry_points.txt +2 -0
- layoutlm_forge-0.1.0/src/layoutlm_forge.egg-info/requires.txt +25 -0
- layoutlm_forge-0.1.0/src/layoutlm_forge.egg-info/top_level.txt +1 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Dhruv
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,313 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: layoutlm-forge
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Production-grade LLMOps infrastructure for context window management, token counting, document chunking, and compression
|
|
5
|
+
Author: Dhruv
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/dhruv-atomic-mui21/layoutlm-forge
|
|
8
|
+
Project-URL: Repository, https://github.com/dhruv-atomic-mui21/layoutlm-forge
|
|
9
|
+
Project-URL: Documentation, https://github.com/dhruv-atomic-mui21/layoutlm-forge/tree/main/docs
|
|
10
|
+
Project-URL: Bug Tracker, https://github.com/dhruv-atomic-mui21/layoutlm-forge/issues
|
|
11
|
+
Project-URL: Changelog, https://github.com/dhruv-atomic-mui21/layoutlm-forge/releases
|
|
12
|
+
Keywords: llm,tokens,tokenizer,chunking,context-window,context-management,llmops,rag,prompt-engineering,openai,anthropic
|
|
13
|
+
Classifier: Development Status :: 4 - Beta
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Operating System :: OS Independent
|
|
21
|
+
Classifier: Intended Audience :: Developers
|
|
22
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
23
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
24
|
+
Classifier: Typing :: Typed
|
|
25
|
+
Requires-Python: >=3.8
|
|
26
|
+
Description-Content-Type: text/markdown
|
|
27
|
+
License-File: LICENSE
|
|
28
|
+
Requires-Dist: tiktoken>=0.5.0
|
|
29
|
+
Requires-Dist: typer>=0.9.0
|
|
30
|
+
Requires-Dist: rich>=13.0.0
|
|
31
|
+
Provides-Extra: api
|
|
32
|
+
Requires-Dist: fastapi>=0.95.0; extra == "api"
|
|
33
|
+
Requires-Dist: uvicorn>=0.21.0; extra == "api"
|
|
34
|
+
Requires-Dist: pydantic>=2.0.0; extra == "api"
|
|
35
|
+
Provides-Extra: dev
|
|
36
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
37
|
+
Requires-Dist: pytest-cov; extra == "dev"
|
|
38
|
+
Requires-Dist: pytest-benchmark; extra == "dev"
|
|
39
|
+
Requires-Dist: ruff; extra == "dev"
|
|
40
|
+
Requires-Dist: mypy; extra == "dev"
|
|
41
|
+
Requires-Dist: httpx>=0.24.0; extra == "dev"
|
|
42
|
+
Requires-Dist: fastapi>=0.95.0; extra == "dev"
|
|
43
|
+
Requires-Dist: uvicorn>=0.21.0; extra == "dev"
|
|
44
|
+
Requires-Dist: pydantic>=2.0.0; extra == "dev"
|
|
45
|
+
Requires-Dist: build; extra == "dev"
|
|
46
|
+
Requires-Dist: twine; extra == "dev"
|
|
47
|
+
Provides-Extra: docs
|
|
48
|
+
Requires-Dist: mkdocs; extra == "docs"
|
|
49
|
+
Requires-Dist: mkdocs-material; extra == "docs"
|
|
50
|
+
Dynamic: license-file
|
|
51
|
+
|
|
52
|
+
<div align="center">
|
|
53
|
+
<h1>🔥 LayoutLM Forge</h1>
|
|
54
|
+
<p><b>Production-Grade LLMOps Infrastructure for Context Window Management</b></p>
|
|
55
|
+
<p><i>Deterministic token counting · Intelligent chunking · Priority-based context assembly · Cost estimation — the foundation every AI application needs.</i></p>
|
|
56
|
+
|
|
57
|
+
[](https://github.com/dhruv-atomic-mui21/layoutlm_forge/actions)
|
|
58
|
+
[](https://pypi.org/project/layoutlm_forge/)
|
|
59
|
+
[](https://pypi.org/project/layoutlm_forge/)
|
|
60
|
+
[](LICENSE)
|
|
61
|
+
[](https://pypi.org/project/layoutlm_forge/)
|
|
62
|
+
</div>
|
|
63
|
+
|
|
64
|
+
---
|
|
65
|
+
|
|
66
|
+
## Why LayoutLM Forge?
|
|
67
|
+
|
|
68
|
+
Every production AI application eventually hits the same infrastructure problems:
|
|
69
|
+
|
|
70
|
+
| Problem | Impact | LayoutLM Forge Solution |
|
|
71
|
+
|---------|--------|-----------------------|
|
|
72
|
+
| 🎯 Context window overflow | Silent failures, truncated responses | Priority-based assembly with overflow tracking |
|
|
73
|
+
| 📊 Inaccurate token counting | Budget overruns, dropped requests | Deterministic counting via tiktoken + heuristic fallbacks |
|
|
74
|
+
| 🔄 Naive text splitting | Broken semantics, degraded LLM reasoning | 5 chunking strategies (sentence, paragraph, semantic, code, fixed) |
|
|
75
|
+
| 💸 Unpredictable API costs | Surprise bills, no cost governance | Pre-flight cost estimation across 15+ models |
|
|
76
|
+
| 🗜️ Oversized prompts | Wasted tokens, slow responses | 4 compression strategies (extractive, truncate, middle-out, map-reduce) |
|
|
77
|
+
|
|
78
|
+
## Installation
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
pip install layoutlm_forge
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
With API server support:
|
|
85
|
+
```bash
|
|
86
|
+
pip install "layoutlm_forge[api]"
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
## Quick Start
|
|
90
|
+
|
|
91
|
+
### Token Counting
|
|
92
|
+
|
|
93
|
+
```python
|
|
94
|
+
from layoutlm_forge import TokenCounter
|
|
95
|
+
|
|
96
|
+
counter = TokenCounter("gpt-4o")
|
|
97
|
+
tokens = counter.count("Hello, world!")
|
|
98
|
+
print(f"Tokens: {tokens}") # Tokens: 4
|
|
99
|
+
|
|
100
|
+
# Check context window fit
|
|
101
|
+
fits = counter.fits_in_window("Your prompt...", reserve_output=500)
|
|
102
|
+
|
|
103
|
+
# Estimate cost before sending
|
|
104
|
+
cost = counter.estimate_cost("Your prompt...", direction="input")
|
|
105
|
+
print(f"Cost: ${cost:.6f}")
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
### Intelligent Chunking
|
|
109
|
+
|
|
110
|
+
```python
|
|
111
|
+
from layoutlm_forge import DocumentChunker, ChunkStrategy
|
|
112
|
+
|
|
113
|
+
chunker = DocumentChunker("gpt-4o")
|
|
114
|
+
|
|
115
|
+
# Chunk respecting paragraph boundaries
|
|
116
|
+
chunks = chunker.chunk(
|
|
117
|
+
long_document,
|
|
118
|
+
strategy=ChunkStrategy.PARAGRAPH,
|
|
119
|
+
max_tokens=500,
|
|
120
|
+
overlap_tokens=50,
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
# Specialized chunkers
|
|
124
|
+
code_chunks = chunker.chunk_code(source_code, language="python")
|
|
125
|
+
md_chunks = chunker.chunk_markdown(readme_text)
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
### Priority-Based Context Assembly
|
|
129
|
+
|
|
130
|
+
The core pattern for RAG applications — guarantee critical context fits while gracefully dropping lower-priority content:
|
|
131
|
+
|
|
132
|
+
```python
|
|
133
|
+
from layoutlm_forge import ContextWindow, Priority
|
|
134
|
+
|
|
135
|
+
window = ContextWindow("gpt-4o")
|
|
136
|
+
|
|
137
|
+
# System instructions — always included
|
|
138
|
+
window.add_block("You are a legal assistant.", Priority.CRITICAL, "system")
|
|
139
|
+
|
|
140
|
+
# User query — high priority
|
|
141
|
+
window.add_block("What is the statute of limitations?", Priority.HIGH, "query")
|
|
142
|
+
|
|
143
|
+
# RAG search results — included if space permits
|
|
144
|
+
window.add_block(search_result_1, Priority.MEDIUM, "rag_1")
|
|
145
|
+
window.add_block(search_result_2, Priority.LOW, "rag_2")
|
|
146
|
+
|
|
147
|
+
# Assemble: packs highest-priority blocks first
|
|
148
|
+
prompt = window.assemble(max_tokens=4096)
|
|
149
|
+
|
|
150
|
+
# See what was included/dropped
|
|
151
|
+
usage = window.usage()
|
|
152
|
+
print(f"Included: {usage['num_included']} blocks ({usage['included_tokens']} tokens)")
|
|
153
|
+
print(f"Dropped: {usage['num_excluded']} blocks")
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
### Cost Estimation
|
|
157
|
+
|
|
158
|
+
```python
|
|
159
|
+
from layoutlm_forge import CostCalculator
|
|
160
|
+
|
|
161
|
+
calc = CostCalculator("gpt-4o")
|
|
162
|
+
|
|
163
|
+
# Single prompt cost
|
|
164
|
+
cost = calc.estimate_prompt("Your prompt text here")
|
|
165
|
+
print(f"Input cost: ${cost.usd:.6f}")
|
|
166
|
+
|
|
167
|
+
# Compare models
|
|
168
|
+
comparison = calc.compare_models(
|
|
169
|
+
texts=["Document 1...", "Document 2..."],
|
|
170
|
+
models=["gpt-4o", "gpt-4o-mini", "claude-3.5-sonnet", "gemini-flash"],
|
|
171
|
+
)
|
|
172
|
+
for model, analysis in comparison.items():
|
|
173
|
+
print(f"{model}: ${analysis.total_usd:.6f} for {analysis.total_tokens} tokens")
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
### Context Compression
|
|
177
|
+
|
|
178
|
+
```python
|
|
179
|
+
from layoutlm_forge import ContextCompressor, CompressionStrategy
|
|
180
|
+
|
|
181
|
+
compressor = ContextCompressor("gpt-4o")
|
|
182
|
+
|
|
183
|
+
# Extractive: keeps most important sentences via TF-IDF scoring
|
|
184
|
+
result = compressor.compress(long_text, target_tokens=200)
|
|
185
|
+
print(f"Compressed: {result.original_tokens} → {result.compressed_tokens} tokens")
|
|
186
|
+
print(f"Savings: {result.savings_pct:.1f}%")
|
|
187
|
+
|
|
188
|
+
# Middle-out: preserves start and end, removes middle
|
|
189
|
+
result = compressor.compress(log_text, target_tokens=300, strategy=CompressionStrategy.MIDDLE_OUT)
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
### Conversation Management
|
|
193
|
+
|
|
194
|
+
```python
|
|
195
|
+
from layoutlm_forge import ConversationManager
|
|
196
|
+
|
|
197
|
+
manager = ConversationManager("gpt-4o")
|
|
198
|
+
|
|
199
|
+
manager.add_message("system", "You are a helpful Python tutor.")
|
|
200
|
+
manager.add_message("user", "Explain decorators")
|
|
201
|
+
manager.add_message("assistant", "Decorators are...")
|
|
202
|
+
# ... many more turns ...
|
|
203
|
+
|
|
204
|
+
# Auto-trim older messages to fit budget, preserving system prompt
|
|
205
|
+
trimmed = manager.get_context(max_tokens=4096, preserve_system=True)
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
## Supported Models
|
|
209
|
+
|
|
210
|
+
| Provider | Models | Token Counting | Pricing |
|
|
211
|
+
|----------|--------|:--------------:|:-------:|
|
|
212
|
+
| **OpenAI** | GPT-4, GPT-4 Turbo, GPT-4o, GPT-4o-mini, GPT-3.5 Turbo | ✅ tiktoken | ✅ |
|
|
213
|
+
| **Anthropic** | Claude 3 Opus, Claude 3.5 Sonnet, Claude 3 Haiku | ≈ estimate | ✅ |
|
|
214
|
+
| **Google** | Gemini Pro, Gemini Flash | ≈ estimate | ✅ |
|
|
215
|
+
| **Meta** | Llama 3 8B, Llama 3 70B | ≈ estimate | — |
|
|
216
|
+
| **Mistral** | Mistral Large | ≈ estimate | ✅ |
|
|
217
|
+
| **Cohere** | Command R+ | ≈ estimate | ✅ |
|
|
218
|
+
|
|
219
|
+
Register custom models:
|
|
220
|
+
```python
|
|
221
|
+
from layoutlm_forge import ModelRegistry, ModelInfo, TokenizerBackend
|
|
222
|
+
|
|
223
|
+
ModelRegistry.register(ModelInfo(
|
|
224
|
+
name="my-fine-tuned-model",
|
|
225
|
+
backend=TokenizerBackend.OPENAI,
|
|
226
|
+
context_window=16_384,
|
|
227
|
+
encoding_name="cl100k_base",
|
|
228
|
+
input_cost_per_1k=0.002,
|
|
229
|
+
output_cost_per_1k=0.006,
|
|
230
|
+
))
|
|
231
|
+
```
|
|
232
|
+
|
|
233
|
+
## CLI
|
|
234
|
+
|
|
235
|
+
```bash
|
|
236
|
+
# Count tokens
|
|
237
|
+
layoutlm_forge count "Hello world" --model gpt-4o
|
|
238
|
+
|
|
239
|
+
# Chunk a document
|
|
240
|
+
layoutlm_forge chunk document.md --strategy semantic --max-tokens 500
|
|
241
|
+
|
|
242
|
+
# Estimate cost
|
|
243
|
+
layoutlm_forge cost document.txt --model claude-3.5-sonnet
|
|
244
|
+
|
|
245
|
+
# List all models
|
|
246
|
+
layoutlm_forge models
|
|
247
|
+
|
|
248
|
+
# Health check
|
|
249
|
+
layoutlm_forge doctor
|
|
250
|
+
|
|
251
|
+
# Start API server
|
|
252
|
+
layoutlm_forge serve --port 8000
|
|
253
|
+
|
|
254
|
+
# Interactive demo
|
|
255
|
+
layoutlm_forge demo
|
|
256
|
+
```
|
|
257
|
+
|
|
258
|
+
## REST API
|
|
259
|
+
|
|
260
|
+
Start the server and access interactive docs at `http://localhost:8000/docs`:
|
|
261
|
+
|
|
262
|
+
```bash
|
|
263
|
+
pip install "layoutlm_forge[api]"
|
|
264
|
+
layoutlm_forge serve
|
|
265
|
+
```
|
|
266
|
+
|
|
267
|
+
| Endpoint | Method | Description |
|
|
268
|
+
|----------|--------|-------------|
|
|
269
|
+
| `/health` | GET | System health + version |
|
|
270
|
+
| `/api/v1/tokens/count` | POST | Count tokens |
|
|
271
|
+
| `/api/v1/tokens/validate` | POST | Check context window fit |
|
|
272
|
+
| `/api/v1/chunks/` | POST | Chunk text |
|
|
273
|
+
| `/api/v1/context/assemble` | POST | Priority-based assembly |
|
|
274
|
+
| `/api/v1/compress/` | POST | Compress text |
|
|
275
|
+
| `/api/v1/cost/estimate` | POST | Estimate cost |
|
|
276
|
+
|
|
277
|
+
## Architecture
|
|
278
|
+
|
|
279
|
+
```
|
|
280
|
+
layoutlm_forge/
|
|
281
|
+
├── models.py # Model registry (15+ models, pricing, backends)
|
|
282
|
+
├── tokenizer.py # Multi-provider token counter (tiktoken + heuristics)
|
|
283
|
+
├── chunker.py # 5-strategy document chunker with overlap
|
|
284
|
+
├── context.py # Priority-based context assembly + conversation manager
|
|
285
|
+
├── compressor.py # 4-strategy compression engine (TF-IDF, middle-out, etc.)
|
|
286
|
+
├── cost.py # Cost estimation engine with model comparison
|
|
287
|
+
├── cli/main.py # Typer CLI with Rich output
|
|
288
|
+
└── api/ # FastAPI server with versioned routes
|
|
289
|
+
```
|
|
290
|
+
|
|
291
|
+
## Docker
|
|
292
|
+
|
|
293
|
+
```bash
|
|
294
|
+
docker build -t layoutlm_forge .
|
|
295
|
+
docker-compose up
|
|
296
|
+
```
|
|
297
|
+
|
|
298
|
+
## Development
|
|
299
|
+
|
|
300
|
+
```bash
|
|
301
|
+
git clone https://github.com/dhruv-atomic-mui21/layoutlm_forge.git
|
|
302
|
+
cd layoutlm_forge
|
|
303
|
+
pip install -e ".[dev]"
|
|
304
|
+
pytest
|
|
305
|
+
```
|
|
306
|
+
|
|
307
|
+
## Contributing
|
|
308
|
+
|
|
309
|
+
See [CONTRIBUTING.md](CONTRIBUTING.md) for development workflow guidelines.
|
|
310
|
+
|
|
311
|
+
## License
|
|
312
|
+
|
|
313
|
+
MIT — see [LICENSE](LICENSE).
|
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
<div align="center">
|
|
2
|
+
<h1>🔥 LayoutLM Forge</h1>
|
|
3
|
+
<p><b>Production-Grade LLMOps Infrastructure for Context Window Management</b></p>
|
|
4
|
+
<p><i>Deterministic token counting · Intelligent chunking · Priority-based context assembly · Cost estimation — the foundation every AI application needs.</i></p>
|
|
5
|
+
|
|
6
|
+
[](https://github.com/dhruv-atomic-mui21/layoutlm_forge/actions)
|
|
7
|
+
[](https://pypi.org/project/layoutlm_forge/)
|
|
8
|
+
[](https://pypi.org/project/layoutlm_forge/)
|
|
9
|
+
[](LICENSE)
|
|
10
|
+
[](https://pypi.org/project/layoutlm_forge/)
|
|
11
|
+
</div>
|
|
12
|
+
|
|
13
|
+
---
|
|
14
|
+
|
|
15
|
+
## Why LayoutLM Forge?
|
|
16
|
+
|
|
17
|
+
Every production AI application eventually hits the same infrastructure problems:
|
|
18
|
+
|
|
19
|
+
| Problem | Impact | LayoutLM Forge Solution |
|
|
20
|
+
|---------|--------|-----------------------|
|
|
21
|
+
| 🎯 Context window overflow | Silent failures, truncated responses | Priority-based assembly with overflow tracking |
|
|
22
|
+
| 📊 Inaccurate token counting | Budget overruns, dropped requests | Deterministic counting via tiktoken + heuristic fallbacks |
|
|
23
|
+
| 🔄 Naive text splitting | Broken semantics, degraded LLM reasoning | 5 chunking strategies (sentence, paragraph, semantic, code, fixed) |
|
|
24
|
+
| 💸 Unpredictable API costs | Surprise bills, no cost governance | Pre-flight cost estimation across 15+ models |
|
|
25
|
+
| 🗜️ Oversized prompts | Wasted tokens, slow responses | 4 compression strategies (extractive, truncate, middle-out, map-reduce) |
|
|
26
|
+
|
|
27
|
+
## Installation
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
pip install layoutlm_forge
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
With API server support:
|
|
34
|
+
```bash
|
|
35
|
+
pip install "layoutlm_forge[api]"
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## Quick Start
|
|
39
|
+
|
|
40
|
+
### Token Counting
|
|
41
|
+
|
|
42
|
+
```python
|
|
43
|
+
from layoutlm_forge import TokenCounter
|
|
44
|
+
|
|
45
|
+
counter = TokenCounter("gpt-4o")
|
|
46
|
+
tokens = counter.count("Hello, world!")
|
|
47
|
+
print(f"Tokens: {tokens}") # Tokens: 4
|
|
48
|
+
|
|
49
|
+
# Check context window fit
|
|
50
|
+
fits = counter.fits_in_window("Your prompt...", reserve_output=500)
|
|
51
|
+
|
|
52
|
+
# Estimate cost before sending
|
|
53
|
+
cost = counter.estimate_cost("Your prompt...", direction="input")
|
|
54
|
+
print(f"Cost: ${cost:.6f}")
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
### Intelligent Chunking
|
|
58
|
+
|
|
59
|
+
```python
|
|
60
|
+
from layoutlm_forge import DocumentChunker, ChunkStrategy
|
|
61
|
+
|
|
62
|
+
chunker = DocumentChunker("gpt-4o")
|
|
63
|
+
|
|
64
|
+
# Chunk respecting paragraph boundaries
|
|
65
|
+
chunks = chunker.chunk(
|
|
66
|
+
long_document,
|
|
67
|
+
strategy=ChunkStrategy.PARAGRAPH,
|
|
68
|
+
max_tokens=500,
|
|
69
|
+
overlap_tokens=50,
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
# Specialized chunkers
|
|
73
|
+
code_chunks = chunker.chunk_code(source_code, language="python")
|
|
74
|
+
md_chunks = chunker.chunk_markdown(readme_text)
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
### Priority-Based Context Assembly
|
|
78
|
+
|
|
79
|
+
The core pattern for RAG applications — guarantee critical context fits while gracefully dropping lower-priority content:
|
|
80
|
+
|
|
81
|
+
```python
|
|
82
|
+
from layoutlm_forge import ContextWindow, Priority
|
|
83
|
+
|
|
84
|
+
window = ContextWindow("gpt-4o")
|
|
85
|
+
|
|
86
|
+
# System instructions — always included
|
|
87
|
+
window.add_block("You are a legal assistant.", Priority.CRITICAL, "system")
|
|
88
|
+
|
|
89
|
+
# User query — high priority
|
|
90
|
+
window.add_block("What is the statute of limitations?", Priority.HIGH, "query")
|
|
91
|
+
|
|
92
|
+
# RAG search results — included if space permits
|
|
93
|
+
window.add_block(search_result_1, Priority.MEDIUM, "rag_1")
|
|
94
|
+
window.add_block(search_result_2, Priority.LOW, "rag_2")
|
|
95
|
+
|
|
96
|
+
# Assemble: packs highest-priority blocks first
|
|
97
|
+
prompt = window.assemble(max_tokens=4096)
|
|
98
|
+
|
|
99
|
+
# See what was included/dropped
|
|
100
|
+
usage = window.usage()
|
|
101
|
+
print(f"Included: {usage['num_included']} blocks ({usage['included_tokens']} tokens)")
|
|
102
|
+
print(f"Dropped: {usage['num_excluded']} blocks")
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
### Cost Estimation
|
|
106
|
+
|
|
107
|
+
```python
|
|
108
|
+
from layoutlm_forge import CostCalculator
|
|
109
|
+
|
|
110
|
+
calc = CostCalculator("gpt-4o")
|
|
111
|
+
|
|
112
|
+
# Single prompt cost
|
|
113
|
+
cost = calc.estimate_prompt("Your prompt text here")
|
|
114
|
+
print(f"Input cost: ${cost.usd:.6f}")
|
|
115
|
+
|
|
116
|
+
# Compare models
|
|
117
|
+
comparison = calc.compare_models(
|
|
118
|
+
texts=["Document 1...", "Document 2..."],
|
|
119
|
+
models=["gpt-4o", "gpt-4o-mini", "claude-3.5-sonnet", "gemini-flash"],
|
|
120
|
+
)
|
|
121
|
+
for model, analysis in comparison.items():
|
|
122
|
+
print(f"{model}: ${analysis.total_usd:.6f} for {analysis.total_tokens} tokens")
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
### Context Compression
|
|
126
|
+
|
|
127
|
+
```python
|
|
128
|
+
from layoutlm_forge import ContextCompressor, CompressionStrategy
|
|
129
|
+
|
|
130
|
+
compressor = ContextCompressor("gpt-4o")
|
|
131
|
+
|
|
132
|
+
# Extractive: keeps most important sentences via TF-IDF scoring
|
|
133
|
+
result = compressor.compress(long_text, target_tokens=200)
|
|
134
|
+
print(f"Compressed: {result.original_tokens} → {result.compressed_tokens} tokens")
|
|
135
|
+
print(f"Savings: {result.savings_pct:.1f}%")
|
|
136
|
+
|
|
137
|
+
# Middle-out: preserves start and end, removes middle
|
|
138
|
+
result = compressor.compress(log_text, target_tokens=300, strategy=CompressionStrategy.MIDDLE_OUT)
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
### Conversation Management
|
|
142
|
+
|
|
143
|
+
```python
|
|
144
|
+
from layoutlm_forge import ConversationManager
|
|
145
|
+
|
|
146
|
+
manager = ConversationManager("gpt-4o")
|
|
147
|
+
|
|
148
|
+
manager.add_message("system", "You are a helpful Python tutor.")
|
|
149
|
+
manager.add_message("user", "Explain decorators")
|
|
150
|
+
manager.add_message("assistant", "Decorators are...")
|
|
151
|
+
# ... many more turns ...
|
|
152
|
+
|
|
153
|
+
# Auto-trim older messages to fit budget, preserving system prompt
|
|
154
|
+
trimmed = manager.get_context(max_tokens=4096, preserve_system=True)
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
## Supported Models
|
|
158
|
+
|
|
159
|
+
| Provider | Models | Token Counting | Pricing |
|
|
160
|
+
|----------|--------|:--------------:|:-------:|
|
|
161
|
+
| **OpenAI** | GPT-4, GPT-4 Turbo, GPT-4o, GPT-4o-mini, GPT-3.5 Turbo | ✅ tiktoken | ✅ |
|
|
162
|
+
| **Anthropic** | Claude 3 Opus, Claude 3.5 Sonnet, Claude 3 Haiku | ≈ estimate | ✅ |
|
|
163
|
+
| **Google** | Gemini Pro, Gemini Flash | ≈ estimate | ✅ |
|
|
164
|
+
| **Meta** | Llama 3 8B, Llama 3 70B | ≈ estimate | — |
|
|
165
|
+
| **Mistral** | Mistral Large | ≈ estimate | ✅ |
|
|
166
|
+
| **Cohere** | Command R+ | ≈ estimate | ✅ |
|
|
167
|
+
|
|
168
|
+
Register custom models:
|
|
169
|
+
```python
|
|
170
|
+
from layoutlm_forge import ModelRegistry, ModelInfo, TokenizerBackend
|
|
171
|
+
|
|
172
|
+
ModelRegistry.register(ModelInfo(
|
|
173
|
+
name="my-fine-tuned-model",
|
|
174
|
+
backend=TokenizerBackend.OPENAI,
|
|
175
|
+
context_window=16_384,
|
|
176
|
+
encoding_name="cl100k_base",
|
|
177
|
+
input_cost_per_1k=0.002,
|
|
178
|
+
output_cost_per_1k=0.006,
|
|
179
|
+
))
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
## CLI
|
|
183
|
+
|
|
184
|
+
```bash
|
|
185
|
+
# Count tokens
|
|
186
|
+
layoutlm_forge count "Hello world" --model gpt-4o
|
|
187
|
+
|
|
188
|
+
# Chunk a document
|
|
189
|
+
layoutlm_forge chunk document.md --strategy semantic --max-tokens 500
|
|
190
|
+
|
|
191
|
+
# Estimate cost
|
|
192
|
+
layoutlm_forge cost document.txt --model claude-3.5-sonnet
|
|
193
|
+
|
|
194
|
+
# List all models
|
|
195
|
+
layoutlm_forge models
|
|
196
|
+
|
|
197
|
+
# Health check
|
|
198
|
+
layoutlm_forge doctor
|
|
199
|
+
|
|
200
|
+
# Start API server
|
|
201
|
+
layoutlm_forge serve --port 8000
|
|
202
|
+
|
|
203
|
+
# Interactive demo
|
|
204
|
+
layoutlm_forge demo
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
## REST API
|
|
208
|
+
|
|
209
|
+
Start the server and access interactive docs at `http://localhost:8000/docs`:
|
|
210
|
+
|
|
211
|
+
```bash
|
|
212
|
+
pip install "layoutlm_forge[api]"
|
|
213
|
+
layoutlm_forge serve
|
|
214
|
+
```
|
|
215
|
+
|
|
216
|
+
| Endpoint | Method | Description |
|
|
217
|
+
|----------|--------|-------------|
|
|
218
|
+
| `/health` | GET | System health + version |
|
|
219
|
+
| `/api/v1/tokens/count` | POST | Count tokens |
|
|
220
|
+
| `/api/v1/tokens/validate` | POST | Check context window fit |
|
|
221
|
+
| `/api/v1/chunks/` | POST | Chunk text |
|
|
222
|
+
| `/api/v1/context/assemble` | POST | Priority-based assembly |
|
|
223
|
+
| `/api/v1/compress/` | POST | Compress text |
|
|
224
|
+
| `/api/v1/cost/estimate` | POST | Estimate cost |
|
|
225
|
+
|
|
226
|
+
## Architecture
|
|
227
|
+
|
|
228
|
+
```
|
|
229
|
+
layoutlm_forge/
|
|
230
|
+
├── models.py # Model registry (15+ models, pricing, backends)
|
|
231
|
+
├── tokenizer.py # Multi-provider token counter (tiktoken + heuristics)
|
|
232
|
+
├── chunker.py # 5-strategy document chunker with overlap
|
|
233
|
+
├── context.py # Priority-based context assembly + conversation manager
|
|
234
|
+
├── compressor.py # 4-strategy compression engine (TF-IDF, middle-out, etc.)
|
|
235
|
+
├── cost.py # Cost estimation engine with model comparison
|
|
236
|
+
├── cli/main.py # Typer CLI with Rich output
|
|
237
|
+
└── api/ # FastAPI server with versioned routes
|
|
238
|
+
```
|
|
239
|
+
|
|
240
|
+
## Docker
|
|
241
|
+
|
|
242
|
+
```bash
|
|
243
|
+
docker build -t layoutlm_forge .
|
|
244
|
+
docker-compose up
|
|
245
|
+
```
|
|
246
|
+
|
|
247
|
+
## Development
|
|
248
|
+
|
|
249
|
+
```bash
|
|
250
|
+
git clone https://github.com/dhruv-atomic-mui21/layoutlm_forge.git
|
|
251
|
+
cd layoutlm_forge
|
|
252
|
+
pip install -e ".[dev]"
|
|
253
|
+
pytest
|
|
254
|
+
```
|
|
255
|
+
|
|
256
|
+
## Contributing
|
|
257
|
+
|
|
258
|
+
See [CONTRIBUTING.md](CONTRIBUTING.md) for development workflow guidelines.
|
|
259
|
+
|
|
260
|
+
## License
|
|
261
|
+
|
|
262
|
+
MIT — see [LICENSE](LICENSE).
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
# API Reference
|
|
2
|
+
|
|
3
|
+
LayoutLM Forge includes a FastAPI application to serve these tools over HTTP.
|
|
4
|
+
|
|
5
|
+
By default, the server runs on port 8000. Start it via CLI:
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
layoutlm_forge serve --port 8000
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Endpoints
|
|
12
|
+
|
|
13
|
+
### `GET /health`
|
|
14
|
+
Returns system health, version, and the number of supported model providers.
|
|
15
|
+
|
|
16
|
+
### `POST /api/v1/tokens/count`
|
|
17
|
+
Count tokens for a given string.
|
|
18
|
+
|
|
19
|
+
**Request:**
|
|
20
|
+
```json
|
|
21
|
+
{
|
|
22
|
+
"text": "Hello world!",
|
|
23
|
+
"model": "gpt-4o"
|
|
24
|
+
}
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
### `POST /api/v1/cost/estimate`
|
|
28
|
+
Estimate input cost for processing text.
|
|
29
|
+
|
|
30
|
+
**Request:**
|
|
31
|
+
```json
|
|
32
|
+
{
|
|
33
|
+
"text": "Hello world!",
|
|
34
|
+
"model": "gpt-4o"
|
|
35
|
+
}
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
### `POST /api/v1/chunks/`
|
|
39
|
+
Chunk text with strategies like `paragraph`, `sentence`, `fixed`, `semantic`, and `code`.
|
|
40
|
+
|
|
41
|
+
### `POST /api/v1/context/assemble`
|
|
42
|
+
Pass a list of priority-labelled blocks.
|
|
43
|
+
|
|
44
|
+
**Request:**
|
|
45
|
+
```json
|
|
46
|
+
{
|
|
47
|
+
"blocks": [
|
|
48
|
+
{"content": "System prompt", "priority": "CRITICAL"},
|
|
49
|
+
{"content": "Irrelevant chat", "priority": "LOW"}
|
|
50
|
+
],
|
|
51
|
+
"max_tokens": 100
|
|
52
|
+
}
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
### `POST /api/v1/compress/`
|
|
56
|
+
Compress long text into a specific target budget.
|
|
57
|
+
|
|
58
|
+
**Strategies**: `extractive`, `truncate`, `middle_out`, `map_reduce`
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# Architecture
|
|
2
|
+
|
|
3
|
+
LayoutLM Forge is split into modular components that can be used independently or together through the API and CLI.
|
|
4
|
+
|
|
5
|
+
## Core Modules
|
|
6
|
+
|
|
7
|
+
1. **`layoutlm_forge.models`**: Stores model metadata, cost schema, and backend mapping (OpenAI, Anthropic, Google, etc.).
|
|
8
|
+
2. **`layoutlm_forge.tokenizer`**: Uses `tiktoken` for OpenAI models and heuristic estimations for others to rapidly count tokens.
|
|
9
|
+
3. **`layoutlm_forge.chunker`**: Employs Regex and token counting to securely chunk documents based on semantics (paragraphs, markdown nodes, functions/classes).
|
|
10
|
+
4. **`layoutlm_forge.context`**: A Priority queue-style greedy packer. It guarantees that `CRITICAL` or `HIGH` priority blocks fit inside the window before packing `LOW` priority blocks.
|
|
11
|
+
5. **`layoutlm_forge.compressor`**: Token budget optimizer. Uses heuristics like TF-IDF or simple map-reduce text truncation to forcefully fit contexts into smaller token constraints.
|
|
12
|
+
|
|
13
|
+
## Flow
|
|
14
|
+
|
|
15
|
+
1. You **Chunk** a large document.
|
|
16
|
+
2. You feed the chunks into the **Context** assembler, adding priority tags (so that old data is dropped if token limit is reached).
|
|
17
|
+
3. If necessary, you run **Compression** on the chunks to squeeze out more space.
|
|
18
|
+
4. Before issuing the API request to the LLM, you use **Cost** to estimate the total expense of your context string.
|