@tfw.in/structura-lib 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/README.md +72 -323
  2. package/dist/cjs/EditableContent.js +46 -18
  3. package/dist/cjs/HtmlViewer.js +238 -85
  4. package/dist/cjs/MathRenderer.js +88 -0
  5. package/dist/cjs/PdfDocumentViewer.js +1 -1
  6. package/dist/cjs/SemanticTagParser.js +189 -0
  7. package/dist/cjs/SemanticTagRenderer.js +135 -0
  8. package/dist/cjs/Structura.js +49 -76
  9. package/dist/cjs/Table.js +75 -8
  10. package/dist/cjs/TableCell.js +34 -10
  11. package/dist/cjs/index.js +12 -0
  12. package/dist/cjs/node_modules/react-icons/fa/index.esm.js +6 -0
  13. package/dist/cjs/styles.css +2 -4
  14. package/dist/cjs/styles.css.map +1 -1
  15. package/dist/esm/EditableContent.js +51 -19
  16. package/dist/esm/HtmlViewer.js +287 -103
  17. package/dist/esm/MathRenderer.js +85 -0
  18. package/dist/esm/PdfDocumentViewer.js +1 -1
  19. package/dist/esm/SemanticTagParser.js +187 -0
  20. package/dist/esm/SemanticTagRenderer.js +140 -0
  21. package/dist/esm/Structura.js +57 -80
  22. package/dist/esm/Table.js +85 -8
  23. package/dist/esm/TableCell.js +34 -6
  24. package/dist/esm/index.js +3 -0
  25. package/dist/esm/node_modules/react-icons/fa/index.esm.js +5 -1
  26. package/dist/esm/styles.css +2 -4
  27. package/dist/esm/styles.css.map +1 -1
  28. package/dist/esm/types/DocumentOutline.d.ts +7 -0
  29. package/dist/esm/types/EditableContent.d.ts +8 -1
  30. package/dist/esm/types/HtmlViewer.d.ts +9 -2
  31. package/dist/esm/types/MathRenderer.d.ts +25 -0
  32. package/dist/esm/types/SemanticTagParser.d.ts +33 -0
  33. package/dist/esm/types/SemanticTagRenderer.d.ts +17 -0
  34. package/dist/esm/types/Structura.d.ts +13 -8
  35. package/dist/esm/types/Table.d.ts +4 -1
  36. package/dist/esm/types/TableCell.d.ts +7 -1
  37. package/dist/esm/types/helpers/index.d.ts +0 -1
  38. package/dist/esm/types/index.d.ts +3 -0
  39. package/dist/esm/types/test-app/src/App.d.ts +1 -2
  40. package/dist/index.d.ts +90 -10
  41. package/package.json +9 -16
  42. package/PRODUCTION_ARCHITECTURE.md +0 -511
  43. package/SAVE_FUNCTIONALITY_COMPLETE.md +0 -448
  44. package/dist/cjs/ui/badge.js +0 -34
  45. package/dist/esm/types/helpers/jsonToHtml.d.ts +0 -40
  46. package/dist/esm/ui/badge.js +0 -31
  47. package/server/README.md +0 -203
  48. package/server/db.js +0 -142
  49. package/server/server.js +0 -165
@@ -1,448 +0,0 @@
1
- # Save Functionality - Production Implementation
2
-
3
- ## Overview
4
-
5
- A production-ready, scalable save system that automatically handles any PDF/JSON combination without pre-configuration.
6
-
7
- ## Key Design Principles
8
-
9
- ✅ **Zero Configuration**: No manual database setup or seeding required
10
- ✅ **Auto-initialization**: Documents created automatically on first save
11
- ✅ **Version Tracking**: Full edit history preserved
12
- ✅ **Scalability**: Works with any PDF/JSON combination
13
- ✅ **Baseline Preservation**: Original JSON stored for diffs
14
-
15
- ## Architecture
16
-
17
- ```
18
- User → Load PDF + JSON → Edit Content → Save
19
-
20
-
21
- First Save?
22
-
23
- ┌─────────────┴─────────────┐
24
- │ │
25
- YES NO
26
- │ │
27
- ▼ ▼
28
- Create document with Find existing document
29
- originalJson baseline
30
- │ │
31
- └─────────────┬─────────────┘
32
-
33
-
34
- Save edit with editedJson
35
-
36
-
37
- Store in SQLite with timestamp
38
- ```
39
-
40
- ## How It Works
41
-
42
- ### First Save Flow
43
-
44
- 1. **User loads document**
45
- ```
46
- http://localhost:5175/?pdf=/doc.pdf&json=/data.json
47
- ```
48
-
49
- 2. **Frontend loads JSON**
50
- - `mockData`: Current state for editing
51
- - `originalData`: Baseline for database
52
-
53
- 3. **User makes edits and clicks Save**
54
-
55
- 4. **Frontend sends to server**
56
- ```json
57
- {
58
- "pdfName": "doc.pdf",
59
- "editedJson": { /* edited state */ },
60
- "originalJson": { /* original baseline */ }
61
- }
62
- ```
63
-
64
- 5. **Backend creates document**
65
- - Stores originalJson in `documents` table
66
- - Stores editedJson in `edits` table
67
- - Returns document ID + edit ID
68
-
69
- 6. **Frontend clears originalData**
70
- - Subsequent saves only send editedJson
71
-
72
- ### Subsequent Save Flow
73
-
74
- 1. **User makes more edits and clicks Save**
75
-
76
- 2. **Frontend sends to server**
77
- ```json
78
- {
79
- "pdfName": "doc.pdf",
80
- "editedJson": { /* new state */ }
81
- }
82
- ```
83
-
84
- 3. **Backend finds existing document**
85
- - Creates new entry in `edits` table
86
- - Returns document ID + edit ID
87
-
88
- ## API Endpoints
89
-
90
- ### POST /api/save
91
-
92
- Save edited document (creates document on first save).
93
-
94
- **Request:**
95
- ```json
96
- {
97
- "pdfName": "document.pdf",
98
- "editedJson": { /* current state */ },
99
- "originalJson": { /* baseline (first save only) */ },
100
- "summary": "Optional description"
101
- }
102
- ```
103
-
104
- **Response:**
105
- ```json
106
- {
107
- "success": true,
108
- "documentId": 1,
109
- "editId": 1,
110
- "message": "Document saved successfully"
111
- }
112
- ```
113
-
114
- ### GET /api/load/:pdfName
115
-
116
- Load document with latest edit.
117
-
118
- **Response:**
119
- ```json
120
- {
121
- "success": true,
122
- "document": {
123
- "id": 1,
124
- "pdfName": "document.pdf",
125
- "originalJson": { /* baseline */ },
126
- "currentJson": { /* latest state */ },
127
- "latestEdit": {
128
- "id": 5,
129
- "edit_summary": "Edit via UI",
130
- "created_at": "2025-11-18 10:00:00"
131
- }
132
- }
133
- }
134
- ```
135
-
136
- ### GET /api/history/:pdfName
137
-
138
- Get full edit history.
139
-
140
- **Response:**
141
- ```json
142
- {
143
- "success": true,
144
- "document": { "id": 1, "pdfName": "document.pdf" },
145
- "history": [
146
- { "id": 5, "edited_json": {}, "created_at": "2025-11-18 10:00:00" },
147
- { "id": 4, "edited_json": {}, "created_at": "2025-11-18 09:55:00" }
148
- ]
149
- }
150
- ```
151
-
152
- ### GET /health
153
-
154
- Health check endpoint.
155
-
156
- **Response:**
157
- ```json
158
- {
159
- "status": "ok",
160
- "timestamp": "2025-11-18T10:00:00.000Z"
161
- }
162
- ```
163
-
164
- ## Database Schema
165
-
166
- ### documents table
167
- ```sql
168
- CREATE TABLE documents (
169
- id INTEGER PRIMARY KEY AUTOINCREMENT,
170
- pdf_name TEXT NOT NULL, -- Unique key
171
- original_json TEXT NOT NULL, -- Baseline
172
- created_at DATETIME DEFAULT CURRENT_TIMESTAMP
173
- );
174
- ```
175
-
176
- ### edits table
177
- ```sql
178
- CREATE TABLE edits (
179
- id INTEGER PRIMARY KEY AUTOINCREMENT,
180
- document_id INTEGER NOT NULL,
181
- edited_json TEXT NOT NULL, -- Full state
182
- edit_summary TEXT,
183
- created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
184
- FOREIGN KEY (document_id) REFERENCES documents(id)
185
- );
186
- ```
187
-
188
- ## Usage
189
-
190
- ### Start the server
191
-
192
- ```bash
193
- cd /Users/__chaks__/learn/saral-next/old-lib
194
- npm run server
195
- ```
196
-
197
- Server runs on: `http://localhost:3002`
198
-
199
- ### Use with any PDF/JSON
200
-
201
- ```
202
- http://localhost:5175/?pdf=/your-file.pdf&json=/your-data.json
203
- ```
204
-
205
- 1. Double-click text to edit
206
- 2. Click green **Save** button
207
- 3. Document automatically created in database
208
- 4. Subsequent edits tracked with full history
209
-
210
- ### Load previous session
211
-
212
- ```javascript
213
- // Fetch last saved state
214
- fetch('http://localhost:3002/api/load/your-file.pdf')
215
- .then(res => res.json())
216
- .then(data => {
217
- // Use data.document.currentJson to resume
218
- });
219
- ```
220
-
221
- ### View edit history
222
-
223
- ```javascript
224
- // Get all edits for a document
225
- fetch('http://localhost:3002/api/history/your-file.pdf')
226
- .then(res => res.json())
227
- .then(data => {
228
- // Browse through data.history
229
- });
230
- ```
231
-
232
- ## Save Behavior
233
-
234
- **Q: Is it save-as-you-type or does a Save button show up?**
235
-
236
- **A: Save button** ✅
237
-
238
- - Green **Save** button appears when you make edits
239
- - Click to save (not auto-save)
240
- - Full control over when to persist changes
241
- - Button shown next to Download button
242
-
243
- ## Production Features
244
-
245
- ### ✅ Implemented
246
-
247
- - [x] Automatic document creation
248
- - [x] Version history tracking
249
- - [x] Scalable for any PDF/JSON
250
- - [x] RESTful API design
251
- - [x] SQLite storage with indexes
252
- - [x] Error handling and logging
253
- - [x] CORS enabled
254
- - [x] 50MB JSON limit
255
- - [x] Smart baseline management
256
-
257
- ### 🔄 Recommended for Production
258
-
259
- - [ ] Add authentication (JWT/OAuth)
260
- - [ ] Add rate limiting
261
- - [ ] Migrate to PostgreSQL
262
- - [ ] Add monitoring/metrics
263
- - [ ] Add automated backups
264
- - [ ] Write comprehensive tests
265
- - [ ] Add input validation
266
- - [ ] Add user_id tracking
267
-
268
- ## Files Structure
269
-
270
- ```
271
- old-lib/
272
- ├── server/
273
- │ ├── db.js # SQLite database layer
274
- │ ├── server.js # Express API server
275
- │ ├── edits.db # SQLite database (auto-created)
276
- │ └── README.md # Server documentation
277
- ├── Structura.tsx # Main component (onSave prop)
278
- ├── HtmlViewer.tsx # HTML viewer (Save button)
279
- ├── test-app/
280
- │ └── src/
281
- │ └── App.tsx # Test app (onSave implementation)
282
- └── PRODUCTION_ARCHITECTURE.md # Detailed architecture docs
283
- ```
284
-
285
- ## Testing
286
-
287
- ### Test with any document
288
-
289
- ```bash
290
- # Open viewer with your PDF/JSON
291
- http://localhost:5175/?pdf=/your-doc.pdf&json=/your-data.json
292
-
293
- # Make edits, click Save
294
- # Document is automatically created
295
-
296
- # Check database
297
- sqlite3 server/edits.db "SELECT * FROM documents;"
298
- sqlite3 server/edits.db "SELECT * FROM edits;"
299
- ```
300
-
301
- ### Test API directly
302
-
303
- ```bash
304
- # Health check
305
- curl http://localhost:3002/health
306
-
307
- # Save document
308
- curl -X POST http://localhost:3002/api/save \
309
- -H "Content-Type: application/json" \
310
- -d '{
311
- "pdfName": "test.pdf",
312
- "editedJson": {"test": "data"},
313
- "originalJson": {"test": "data"},
314
- "summary": "First save"
315
- }'
316
-
317
- # Load document
318
- curl http://localhost:3002/api/load/test.pdf | python3 -m json.tool
319
-
320
- # Get history
321
- curl http://localhost:3002/api/history/test.pdf | python3 -m json.tool
322
- ```
323
-
324
- ## Database Location
325
-
326
- ```
327
- /Users/__chaks__/learn/saral-next/old-lib/server/edits.db
328
- ```
329
-
330
- Inspect with:
331
- ```bash
332
- cd /Users/__chaks__/learn/saral-next/old-lib
333
- sqlite3 server/edits.db
334
-
335
- .tables
336
- .schema documents
337
- .schema edits
338
- SELECT * FROM documents;
339
- SELECT * FROM edits ORDER BY created_at DESC;
340
- ```
341
-
342
- ## Scalability
343
-
344
- ### Handles Any Volume
345
-
346
- - ✅ Any number of PDFs
347
- - ✅ Any JSON size (up to 50MB)
348
- - ✅ Any number of edits per document
349
- - ✅ Automatic indexing for fast lookups
350
- - ✅ No pre-configuration needed
351
-
352
- ### Performance Characteristics
353
-
354
- - Document lookup: O(log n) with index
355
- - Latest edit: O(log n) with index
356
- - Full history: O(m) where m = edits per document
357
- - Storage: Linear with number of edits
358
-
359
- ### Recommended Limits
360
-
361
- - **Development**: SQLite handles 1000s of documents easily
362
- - **Production**: Migrate to PostgreSQL for 10,000+ documents
363
- - **JSON size**: Keep under 10MB for best performance
364
- - **Edits per document**: No practical limit
365
-
366
- ## Monitoring
367
-
368
- ### Server Logs
369
-
370
- ```
371
- [Server] Structura edit server running on http://localhost:3002
372
- [Server] Created new document for doc.pdf with ID 1
373
- [Server] Saved edit 1 for document 1
374
- [Server] Found existing document for doc.pdf with ID 1
375
- [Server] Saved edit 2 for document 1
376
- ```
377
-
378
- ### Monitor Database Growth
379
-
380
- ```bash
381
- # Database size
382
- ls -lh server/edits.db
383
-
384
- # Document count
385
- sqlite3 server/edits.db "SELECT COUNT(*) FROM documents;"
386
-
387
- # Edit count
388
- sqlite3 server/edits.db "SELECT COUNT(*) FROM edits;"
389
-
390
- # Edits per document
391
- sqlite3 server/edits.db "
392
- SELECT pdf_name, COUNT(e.id) as edit_count
393
- FROM documents d
394
- LEFT JOIN edits e ON d.id = e.document_id
395
- GROUP BY d.id
396
- ORDER BY edit_count DESC;
397
- "
398
- ```
399
-
400
- ## Deployment
401
-
402
- ### Development
403
-
404
- ```bash
405
- npm run server # Start server
406
- npm run server:dev # With auto-restart (nodemon)
407
- ```
408
-
409
- ### Production (PM2)
410
-
411
- ```bash
412
- npm install -g pm2
413
-
414
- # Start server
415
- pm2 start server/server.js --name structura-edit
416
-
417
- # Save process list
418
- pm2 save
419
-
420
- # Auto-start on reboot
421
- pm2 startup
422
- ```
423
-
424
- ### Docker
425
-
426
- ```dockerfile
427
- FROM node:18
428
- WORKDIR /app
429
- COPY package*.json ./
430
- RUN npm install --production
431
- COPY . .
432
- EXPOSE 3002
433
- CMD ["node", "server/server.js"]
434
- ```
435
-
436
- ## Status: ✅ PRODUCTION-READY
437
-
438
- The save functionality is complete and designed for production use:
439
-
440
- - Zero configuration required
441
- - Scales to any PDF/JSON combination
442
- - Automatic document creation
443
- - Full version history
444
- - RESTful API design
445
- - Comprehensive error handling
446
- - Production-ready architecture
447
-
448
- Start using it immediately with any PDF and JSON file!
@@ -1,34 +0,0 @@
1
- 'use strict';
2
-
3
- var jsxRuntime = require('react/jsx-runtime');
4
- var classVarianceAuthority = require('class-variance-authority');
5
- var utils = require('../lib/utils.js');
6
-
7
- const badgeVariants = classVarianceAuthority.cva("inline-flex items-center rounded-full border px-2.5 py-0.5 text-xs font-semibold transition-colors focus:outline-none focus:ring-2 focus:ring-ring focus:ring-offset-2", {
8
- variants: {
9
- variant: {
10
- default: "border-transparent bg-primary text-primary-foreground hover:bg-primary/80",
11
- secondary: "border-transparent bg-secondary text-secondary-foreground hover:bg-secondary/80",
12
- destructive: "border-transparent bg-destructive text-destructive-foreground hover:bg-destructive/80",
13
- outline: "text-foreground"
14
- }
15
- },
16
- defaultVariants: {
17
- variant: "default"
18
- }
19
- });
20
- function Badge({
21
- className,
22
- variant,
23
- ...props
24
- }) {
25
- return jsxRuntime.jsx("div", {
26
- className: utils.cn(badgeVariants({
27
- variant
28
- }), className),
29
- ...props
30
- });
31
- }
32
-
33
- exports.Badge = Badge;
34
- exports.badgeVariants = badgeVariants;
@@ -1,40 +0,0 @@
1
- /**
2
- * Convert Marker JSON block output to HTML by resolving content-ref tags.
3
- * This is the TypeScript equivalent of core/output.py's json_to_html function.
4
- *
5
- * Usage:
6
- * - For Gemini-corrected JSONs: The GeminiCorrected block has clean HTML, no refs to resolve
7
- * - For original JSONs: Resolves <content-ref src='...'> tags recursively
8
- */
9
- interface Block {
10
- id: string;
11
- html?: string;
12
- children?: Block[];
13
- block_type?: string;
14
- [key: string]: any;
15
- }
16
- /**
17
- * Resolve content-ref tags in HTML by replacing them with actual child content
18
- */
19
- export declare function jsonToHtml(block: Block): string;
20
- /**
21
- * Check if a block has Gemini corrections
22
- */
23
- export declare function hasGeminiCorrections(block: Block): boolean;
24
- /**
25
- * Get the corrected HTML from a Gemini-corrected block, or fall back to rendering original
26
- */
27
- export declare function getBlockHtml(block: Block): string;
28
- /**
29
- * Render an entire page to HTML
30
- */
31
- export declare function renderPageToHtml(page: Block): string;
32
- /**
33
- * Render the entire document to HTML (all pages)
34
- */
35
- export declare function renderDocumentToHtml(document: Block): string;
36
- /**
37
- * Extract plain text from HTML (strips all tags)
38
- */
39
- export declare function htmlToPlainText(html: string): string;
40
- export {};
@@ -1,31 +0,0 @@
1
- import { objectWithoutProperties as _objectWithoutProperties, objectSpread2 as _objectSpread2 } from '../_virtual/_rollupPluginBabelHelpers.js';
2
- import { jsx } from 'react/jsx-runtime';
3
- import { cva } from 'class-variance-authority';
4
- import { cn } from '../lib/utils.js';
5
-
6
- var _excluded = ["className", "variant"];
7
- var badgeVariants = cva("inline-flex items-center rounded-full border px-2.5 py-0.5 text-xs font-semibold transition-colors focus:outline-none focus:ring-2 focus:ring-ring focus:ring-offset-2", {
8
- variants: {
9
- variant: {
10
- default: "border-transparent bg-primary text-primary-foreground hover:bg-primary/80",
11
- secondary: "border-transparent bg-secondary text-secondary-foreground hover:bg-secondary/80",
12
- destructive: "border-transparent bg-destructive text-destructive-foreground hover:bg-destructive/80",
13
- outline: "text-foreground"
14
- }
15
- },
16
- defaultVariants: {
17
- variant: "default"
18
- }
19
- });
20
- function Badge(_ref) {
21
- var className = _ref.className,
22
- variant = _ref.variant,
23
- props = _objectWithoutProperties(_ref, _excluded);
24
- return jsx("div", _objectSpread2({
25
- className: cn(badgeVariants({
26
- variant: variant
27
- }), className)
28
- }, props));
29
- }
30
-
31
- export { Badge, badgeVariants };