@arela/uploader 1.0.2 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.local +316 -0
- package/.env.template +70 -0
- package/coverage/IdentifyCommand.js.html +1462 -0
- package/coverage/PropagateCommand.js.html +1507 -0
- package/coverage/PushCommand.js.html +1504 -0
- package/coverage/ScanCommand.js.html +1654 -0
- package/coverage/UploadCommand.js.html +1846 -0
- package/coverage/WatchCommand.js.html +4111 -0
- package/coverage/base.css +224 -0
- package/coverage/block-navigation.js +87 -0
- package/coverage/favicon.png +0 -0
- package/coverage/index.html +191 -0
- package/coverage/lcov-report/IdentifyCommand.js.html +1462 -0
- package/coverage/lcov-report/PropagateCommand.js.html +1507 -0
- package/coverage/lcov-report/PushCommand.js.html +1504 -0
- package/coverage/lcov-report/ScanCommand.js.html +1654 -0
- package/coverage/lcov-report/UploadCommand.js.html +1846 -0
- package/coverage/lcov-report/WatchCommand.js.html +4111 -0
- package/coverage/lcov-report/base.css +224 -0
- package/coverage/lcov-report/block-navigation.js +87 -0
- package/coverage/lcov-report/favicon.png +0 -0
- package/coverage/lcov-report/index.html +191 -0
- package/coverage/lcov-report/prettify.css +1 -0
- package/coverage/lcov-report/prettify.js +2 -0
- package/coverage/lcov-report/sort-arrow-sprite.png +0 -0
- package/coverage/lcov-report/sorter.js +210 -0
- package/coverage/lcov.info +1937 -0
- package/coverage/prettify.css +1 -0
- package/coverage/prettify.js +2 -0
- package/coverage/sort-arrow-sprite.png +0 -0
- package/coverage/sorter.js +210 -0
- package/docs/API_RETRY_MECHANISM.md +338 -0
- package/docs/ARELA_IDENTIFY_IMPLEMENTATION.md +489 -0
- package/docs/ARELA_IDENTIFY_QUICKREF.md +186 -0
- package/docs/ARELA_PROPAGATE_IMPLEMENTATION.md +581 -0
- package/docs/ARELA_PROPAGATE_QUICKREF.md +272 -0
- package/docs/ARELA_PUSH_IMPLEMENTATION.md +577 -0
- package/docs/ARELA_PUSH_QUICKREF.md +322 -0
- package/docs/ARELA_SCAN_IMPLEMENTATION.md +373 -0
- package/docs/ARELA_SCAN_QUICKREF.md +139 -0
- package/docs/CROSS_PLATFORM_PATH_HANDLING.md +593 -0
- package/docs/DETECTION_ATTEMPT_TRACKING.md +414 -0
- package/docs/MIGRATION_UPLOADER_TO_FILE_STATS.md +1020 -0
- package/docs/MULTI_LEVEL_DIRECTORY_SCANNING.md +494 -0
- package/docs/STATS_COMMAND_SEQUENCE_DIAGRAM.md +287 -0
- package/docs/STATS_COMMAND_SIMPLE.md +93 -0
- package/package.json +31 -3
- package/src/commands/IdentifyCommand.js +459 -0
- package/src/commands/PropagateCommand.js +474 -0
- package/src/commands/PushCommand.js +473 -0
- package/src/commands/ScanCommand.js +523 -0
- package/src/config/config.js +154 -7
- package/src/file-detection.js +9 -10
- package/src/index.js +150 -0
- package/src/services/ScanApiService.js +645 -0
- package/src/utils/PathNormalizer.js +220 -0
- package/tests/commands/IdentifyCommand.test.js +570 -0
- package/tests/commands/PropagateCommand.test.js +568 -0
- package/tests/commands/PushCommand.test.js +754 -0
- package/tests/commands/ScanCommand.test.js +382 -0
- package/tests/unit/PathAndTableNameGeneration.test.js +1211 -0
|
@@ -0,0 +1,287 @@
|
|
|
1
|
+
# Stats Command Sequence Diagram
|
|
2
|
+
|
|
3
|
+
## Command: `node src/index.js stats --stats-only`
|
|
4
|
+
|
|
5
|
+
This diagram shows the detailed flow of the stats command with the `--stats-only` flag, which collects file statistics and records them to the database without uploading files.
|
|
6
|
+
|
|
7
|
+
```mermaid
|
|
8
|
+
sequenceDiagram
|
|
9
|
+
participant User
|
|
10
|
+
participant CLI as ArelaUploaderCLI<br/>(index.js)
|
|
11
|
+
participant Commander as Commander.js
|
|
12
|
+
participant UploadCmd as UploadCommand<br/>(UploadCommand.js)
|
|
13
|
+
participant Config as appConfig<br/>(config.js)
|
|
14
|
+
participant FileOps as FileOperations<br/>(FileOperations.js)
|
|
15
|
+
participant Globby as globby
|
|
16
|
+
participant DBService as DatabaseService<br/>(DatabaseService.js)
|
|
17
|
+
participant Factory as UploadServiceFactory
|
|
18
|
+
participant APIService as ApiUploadService
|
|
19
|
+
participant ArelaAPI as Arela API<br/>/api/uploader/batch-upsert
|
|
20
|
+
participant Logger as LoggingService
|
|
21
|
+
participant ProgressBar as cli-progress
|
|
22
|
+
|
|
23
|
+
%% Initial Setup
|
|
24
|
+
User->>CLI: node src/index.js stats --stats-only
|
|
25
|
+
activate CLI
|
|
26
|
+
Note over CLI: Constructor initializes<br/>ErrorHandler, UploadCommand,<br/>WatchCommand
|
|
27
|
+
|
|
28
|
+
CLI->>CLI: #setupProgram()
|
|
29
|
+
CLI->>CLI: #setupCommands()
|
|
30
|
+
Note over CLI: Register 'stats' command<br/>with Commander
|
|
31
|
+
|
|
32
|
+
CLI->>CLI: #setupErrorHandling()
|
|
33
|
+
Note over CLI: Register SIGINT, SIGTERM,<br/>uncaughtException handlers
|
|
34
|
+
|
|
35
|
+
CLI->>CLI: run()
|
|
36
|
+
CLI->>Logger: setVerbose(false)
|
|
37
|
+
CLI->>Logger: info("Arela Uploader started")
|
|
38
|
+
|
|
39
|
+
%% Command Parsing
|
|
40
|
+
CLI->>Commander: parseAsync()
|
|
41
|
+
activate Commander
|
|
42
|
+
Commander->>Commander: Parse args: ['stats', '--stats-only']
|
|
43
|
+
Commander->>Commander: Match 'stats' command
|
|
44
|
+
|
|
45
|
+
%% Stats Command Action
|
|
46
|
+
Commander->>UploadCmd: execute(options)
|
|
47
|
+
activate UploadCmd
|
|
48
|
+
Note over UploadCmd: options = {<br/> statsOnly: true,<br/> batchSize: '10',<br/> api: 'default'<br/>}
|
|
49
|
+
|
|
50
|
+
%% API Configuration
|
|
51
|
+
alt api !== 'default'
|
|
52
|
+
UploadCmd->>Config: setApiTarget(options.api)
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
%% Check Watch Mode
|
|
56
|
+
UploadCmd->>UploadCmd: Check if watch mode active
|
|
57
|
+
Note over UploadCmd: Prevent upload during watch
|
|
58
|
+
|
|
59
|
+
%% Validation
|
|
60
|
+
UploadCmd->>UploadCmd: #validateOptions(options)
|
|
61
|
+
UploadCmd->>Config: validateConfiguration(false)
|
|
62
|
+
activate Config
|
|
63
|
+
Config-->>UploadCmd: Configuration valid
|
|
64
|
+
deactivate Config
|
|
65
|
+
|
|
66
|
+
%% Initialize Services
|
|
67
|
+
UploadCmd->>Factory: getUploadService(false)
|
|
68
|
+
activate Factory
|
|
69
|
+
Factory->>Config: isApiModeAvailable()
|
|
70
|
+
Config-->>Factory: true/false
|
|
71
|
+
Factory->>APIService: new ApiUploadService()
|
|
72
|
+
activate APIService
|
|
73
|
+
APIService->>Config: getApiConfig()
|
|
74
|
+
Config-->>APIService: { baseUrl, token }
|
|
75
|
+
APIService-->>Factory: apiService instance
|
|
76
|
+
deactivate APIService
|
|
77
|
+
Factory-->>UploadCmd: uploadService
|
|
78
|
+
deactivate Factory
|
|
79
|
+
|
|
80
|
+
UploadCmd->>Config: getUploadSources()
|
|
81
|
+
Config-->>UploadCmd: sources array (e.g., ['sample/2023'])
|
|
82
|
+
|
|
83
|
+
UploadCmd->>Config: getBasePath()
|
|
84
|
+
Config-->>UploadCmd: basePath
|
|
85
|
+
|
|
86
|
+
UploadCmd->>Logger: info("Starting upload with Arela API")
|
|
87
|
+
|
|
88
|
+
%% Process Each Source
|
|
89
|
+
loop For each source in sources
|
|
90
|
+
Note over UploadCmd: Sequential source processing
|
|
91
|
+
UploadCmd->>UploadCmd: Resolve sourcePath
|
|
92
|
+
Note over UploadCmd: sourcePath = path.resolve(basePath, source)
|
|
93
|
+
|
|
94
|
+
UploadCmd->>Logger: info("Processing folder: " + sourcePath)
|
|
95
|
+
|
|
96
|
+
%% File Discovery
|
|
97
|
+
UploadCmd->>UploadCmd: #discoverFiles(sourcePath)
|
|
98
|
+
UploadCmd->>FileOps: fileExists(sourcePath)
|
|
99
|
+
FileOps-->>UploadCmd: true
|
|
100
|
+
|
|
101
|
+
UploadCmd->>FileOps: getFileStats(sourcePath)
|
|
102
|
+
FileOps-->>UploadCmd: stats (isDirectory: true)
|
|
103
|
+
|
|
104
|
+
UploadCmd->>Globby: globby([sourcePath/**/*], {onlyFiles: true})
|
|
105
|
+
activate Globby
|
|
106
|
+
Globby-->>UploadCmd: files array
|
|
107
|
+
deactivate Globby
|
|
108
|
+
|
|
109
|
+
UploadCmd->>Logger: info("Found X files to process")
|
|
110
|
+
|
|
111
|
+
%% Process Files in Batches
|
|
112
|
+
UploadCmd->>UploadCmd: #processFilesInBatches(files, options, ...)
|
|
113
|
+
Note over UploadCmd: batchSize = 10 (from options)
|
|
114
|
+
|
|
115
|
+
UploadCmd->>ProgressBar: new SingleBar()
|
|
116
|
+
activate ProgressBar
|
|
117
|
+
UploadCmd->>ProgressBar: start(files.length, 0)
|
|
118
|
+
|
|
119
|
+
loop For each batch of files
|
|
120
|
+
Note over UploadCmd: batch = files.slice(i, i + batchSize)
|
|
121
|
+
|
|
122
|
+
UploadCmd->>UploadCmd: #processBatch(batch, options, ...)
|
|
123
|
+
|
|
124
|
+
%% Stats Only Mode
|
|
125
|
+
Note over UploadCmd: options.statsOnly === true
|
|
126
|
+
|
|
127
|
+
%% Create File Objects
|
|
128
|
+
loop For each file in batch
|
|
129
|
+
UploadCmd->>FileOps: getFileStats(filePath)
|
|
130
|
+
FileOps-->>UploadCmd: stats
|
|
131
|
+
Note over UploadCmd: Create fileObject:<br/>{path, originalName, stats}
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
%% Insert Stats to Database
|
|
135
|
+
UploadCmd->>DBService: insertStatsOnlyToUploaderTable(fileObjects, options)
|
|
136
|
+
activate DBService
|
|
137
|
+
|
|
138
|
+
DBService->>Logger: info("Collecting filesystem stats...")
|
|
139
|
+
|
|
140
|
+
%% Filter System Files
|
|
141
|
+
loop For each file
|
|
142
|
+
Note over DBService: Skip system files:<br/>.DS_Store, __pycache__, etc.
|
|
143
|
+
|
|
144
|
+
DBService->>DBService: Create record object
|
|
145
|
+
Note over DBService: record = {<br/> name, documentType: null,<br/> size, filename,<br/> originalPath, arelaPath: null,<br/> status: 'fs-stats',<br/> rfc: null, fileExtension,<br/> modifiedAt, year: null,<br/> processingStatus: 'PENDING',<br/> uploadAttempts: 0<br/>}
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
DBService->>Logger: info("Processing X file stats in batches...")
|
|
149
|
+
|
|
150
|
+
%% Get Upload Service
|
|
151
|
+
alt options.apiTarget specified
|
|
152
|
+
DBService->>Factory: getApiServiceForTarget(apiTarget)
|
|
153
|
+
else default
|
|
154
|
+
DBService->>Factory: getUploadService(false)
|
|
155
|
+
end
|
|
156
|
+
Factory-->>DBService: uploadService (ApiUploadService)
|
|
157
|
+
|
|
158
|
+
%% Batch Processing (1000 records per API call)
|
|
159
|
+
loop For each batch of 1000 records
|
|
160
|
+
DBService->>APIService: batchUpsertStats(batch)
|
|
161
|
+
activate APIService
|
|
162
|
+
|
|
163
|
+
APIService->>APIService: Prepare request
|
|
164
|
+
Note over APIService: URL: baseUrl + '/api/uploader/batch-upsert'<br/>Headers: x-api-key, Content-Type<br/>Body: JSON.stringify(records)
|
|
165
|
+
|
|
166
|
+
APIService->>ArelaAPI: POST /api/uploader/batch-upsert
|
|
167
|
+
activate ArelaAPI
|
|
168
|
+
Note over ArelaAPI: Process batch upsert<br/>(insert or update by original_path)
|
|
169
|
+
ArelaAPI-->>APIService: { inserted, updated, total }
|
|
170
|
+
deactivate ArelaAPI
|
|
171
|
+
|
|
172
|
+
APIService-->>DBService: result
|
|
173
|
+
deactivate APIService
|
|
174
|
+
|
|
175
|
+
alt Batch % 10 === 0 (every 10th batch)
|
|
176
|
+
DBService->>Logger: info("Batch N: X new, Y updates")
|
|
177
|
+
end
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
DBService->>Logger: success("Phase 1 Summary: X new records, Y updates")
|
|
181
|
+
DBService-->>UploadCmd: { totalInserted, totalSkipped, totalProcessed }
|
|
182
|
+
deactivate DBService
|
|
183
|
+
|
|
184
|
+
%% Update Results
|
|
185
|
+
Note over UploadCmd: batchResults.successCount = totalInserted<br/>batchResults.skippedCount = totalSkipped
|
|
186
|
+
|
|
187
|
+
UploadCmd->>UploadCmd: #updateResults(results, batchResults)
|
|
188
|
+
|
|
189
|
+
%% Update Progress Bar
|
|
190
|
+
UploadCmd->>ProgressBar: update(processedCount, {success, errors})
|
|
191
|
+
|
|
192
|
+
%% Batch Delay
|
|
193
|
+
alt appConfig.performance.batchDelay > 0
|
|
194
|
+
UploadCmd->>UploadCmd: setTimeout(batchDelay)
|
|
195
|
+
Note over UploadCmd: Wait between batches
|
|
196
|
+
end
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
UploadCmd->>ProgressBar: stop()
|
|
200
|
+
deactivate ProgressBar
|
|
201
|
+
|
|
202
|
+
%% Source Summary
|
|
203
|
+
UploadCmd->>UploadCmd: #logSourceSummary(source, result, options)
|
|
204
|
+
UploadCmd->>User: Console: Summary for source<br/>📊 Stats recorded: X<br/>⏭️ Duplicates: Y<br/>❌ Errors: Z
|
|
205
|
+
|
|
206
|
+
UploadCmd->>UploadCmd: #updateGlobalResults(globalResults, result)
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
%% Final Summary
|
|
210
|
+
UploadCmd->>UploadCmd: #logFinalSummary(globalResults, options, uploadService)
|
|
211
|
+
UploadCmd->>User: Console: STATS COLLECTION COMPLETED<br/>📊 Total stats recorded: X<br/>⏭️ Total duplicates: Y<br/>❌ Total errors: Z<br/>📜 Log file: path
|
|
212
|
+
|
|
213
|
+
%% Additional Phases (if --run-all-phases)
|
|
214
|
+
alt options.runAllPhases && options.statsOnly
|
|
215
|
+
UploadCmd->>UploadCmd: #runAdditionalPhases(options)
|
|
216
|
+
Note over UploadCmd: This would run Phase 2 (PDF Detection)<br/>Not executed with --stats-only alone
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
UploadCmd-->>Commander: Complete
|
|
220
|
+
deactivate UploadCmd
|
|
221
|
+
Commander-->>CLI: Command executed
|
|
222
|
+
deactivate Commander
|
|
223
|
+
|
|
224
|
+
CLI->>Logger: flush()
|
|
225
|
+
CLI-->>User: Exit code 0
|
|
226
|
+
deactivate CLI
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
## Key Flow Points
|
|
230
|
+
|
|
231
|
+
### 1. **Initialization Phase**
|
|
232
|
+
- CLI instantiates main components (ErrorHandler, UploadCommand, WatchCommand)
|
|
233
|
+
- Sets up Commander.js with all available commands
|
|
234
|
+
- Registers error handlers for graceful shutdown
|
|
235
|
+
|
|
236
|
+
### 2. **Command Parsing Phase**
|
|
237
|
+
- Commander.js parses arguments and identifies 'stats' command
|
|
238
|
+
- Options are merged: `{ ...options, statsOnly: true }`
|
|
239
|
+
- API target is configured if specified
|
|
240
|
+
|
|
241
|
+
### 3. **Validation Phase**
|
|
242
|
+
- Validates configuration (API URL, token)
|
|
243
|
+
- Validates batch size (1-100)
|
|
244
|
+
- Checks watch mode isn't active
|
|
245
|
+
|
|
246
|
+
### 4. **Service Initialization Phase**
|
|
247
|
+
- UploadServiceFactory determines available service (API vs Supabase)
|
|
248
|
+
- Creates ApiUploadService with HTTP agent configuration
|
|
249
|
+
- Retrieves upload sources and base path from config
|
|
250
|
+
|
|
251
|
+
### 5. **File Discovery Phase**
|
|
252
|
+
- For each source directory
|
|
253
|
+
- Uses globby to recursively find all files
|
|
254
|
+
- Filters to only files (not directories)
|
|
255
|
+
|
|
256
|
+
### 6. **Batch Processing Phase**
|
|
257
|
+
- Files are processed in configurable batches (default: 10)
|
|
258
|
+
- For each batch:
|
|
259
|
+
- Reads file stats (size, mtime)
|
|
260
|
+
- Filters out system files (.DS_Store, __pycache__, etc.)
|
|
261
|
+
- Creates record objects with status='fs-stats'
|
|
262
|
+
- Groups records into API batches (1000 per call)
|
|
263
|
+
|
|
264
|
+
### 7. **Database Recording Phase**
|
|
265
|
+
- ApiUploadService makes POST request to `/api/uploader/batch-upsert`
|
|
266
|
+
- API performs upsert (insert or update by original_path)
|
|
267
|
+
- Returns counts: inserted, updated, total
|
|
268
|
+
- Progress bar updates after each batch
|
|
269
|
+
|
|
270
|
+
### 8. **Summary Phase**
|
|
271
|
+
- Logs per-source summary (stats recorded, duplicates, errors)
|
|
272
|
+
- Logs final summary across all sources
|
|
273
|
+
- Displays log file path
|
|
274
|
+
|
|
275
|
+
### 9. **Cleanup Phase**
|
|
276
|
+
- Logger flushes buffered logs
|
|
277
|
+
- Process exits gracefully with code 0
|
|
278
|
+
|
|
279
|
+
## Important Notes
|
|
280
|
+
|
|
281
|
+
- **Stats-only mode**: Does NOT detect document types, does NOT upload files
|
|
282
|
+
- **Record status**: All records get `status='fs-stats'` and `processingStatus='PENDING'`
|
|
283
|
+
- **Upsert behavior**: Records are inserted or updated by `original_path` (prevents duplicates)
|
|
284
|
+
- **Batch sizes**: Files batched at 10 (configurable), API calls batched at 1000 records
|
|
285
|
+
- **System files**: Automatically filtered (.DS_Store, __pycache__, .swp, etc.)
|
|
286
|
+
- **Progress tracking**: Real-time progress bar with success/error counts
|
|
287
|
+
- **HTTP agent**: Connection pooling optimized for multiple API replicas (10 concurrent connections)
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
# Stats Command - Simplified Flow
|
|
2
|
+
|
|
3
|
+
## Command: `node src/index.js stats --stats-only`
|
|
4
|
+
|
|
5
|
+
A high-level overview of the stats collection process.
|
|
6
|
+
|
|
7
|
+
```mermaid
|
|
8
|
+
sequenceDiagram
|
|
9
|
+
participant User
|
|
10
|
+
participant CLI as CLI Application
|
|
11
|
+
participant UploadCmd as Upload Command
|
|
12
|
+
participant Config as Configuration
|
|
13
|
+
participant FileSystem as File System
|
|
14
|
+
participant Database as Database Service
|
|
15
|
+
participant API as Arela API
|
|
16
|
+
|
|
17
|
+
User->>CLI: stats --stats-only
|
|
18
|
+
activate CLI
|
|
19
|
+
|
|
20
|
+
CLI->>UploadCmd: Execute stats command
|
|
21
|
+
activate UploadCmd
|
|
22
|
+
|
|
23
|
+
UploadCmd->>Config: Get sources & validate config
|
|
24
|
+
Config-->>UploadCmd: Sources array & settings
|
|
25
|
+
|
|
26
|
+
loop For each source directory
|
|
27
|
+
UploadCmd->>FileSystem: Discover all files
|
|
28
|
+
FileSystem-->>UploadCmd: List of file paths
|
|
29
|
+
|
|
30
|
+
loop Process in batches
|
|
31
|
+
UploadCmd->>FileSystem: Read file stats (size, mtime)
|
|
32
|
+
FileSystem-->>UploadCmd: File metadata
|
|
33
|
+
|
|
34
|
+
UploadCmd->>Database: Insert stats records
|
|
35
|
+
activate Database
|
|
36
|
+
|
|
37
|
+
Database->>Database: Filter system files<br/>Create record objects<br/>(status='fs-stats')
|
|
38
|
+
|
|
39
|
+
Database->>API: POST /api/uploader/batch-upsert
|
|
40
|
+
activate API
|
|
41
|
+
API-->>Database: {inserted, updated}
|
|
42
|
+
deactivate API
|
|
43
|
+
|
|
44
|
+
Database-->>UploadCmd: Success count
|
|
45
|
+
deactivate Database
|
|
46
|
+
|
|
47
|
+
UploadCmd->>User: Update progress bar
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
UploadCmd->>User: Source summary
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
UploadCmd->>User: Final summary<br/>📊 Total stats recorded<br/>⏭️ Duplicates<br/>❌ Errors
|
|
54
|
+
|
|
55
|
+
deactivate UploadCmd
|
|
56
|
+
deactivate CLI
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
## Process Overview
|
|
60
|
+
|
|
61
|
+
### 1. **Command Initialization**
|
|
62
|
+
- CLI parses `stats --stats-only` command
|
|
63
|
+
- Validates configuration (API URL, token)
|
|
64
|
+
- Initializes Upload Command
|
|
65
|
+
|
|
66
|
+
### 2. **File Discovery**
|
|
67
|
+
- Retrieves source directories from configuration
|
|
68
|
+
- Recursively scans for all files in each source
|
|
69
|
+
- Filters out directories, keeps only files
|
|
70
|
+
|
|
71
|
+
### 3. **Stats Collection**
|
|
72
|
+
- Processes files in batches (default: 10 files)
|
|
73
|
+
- Reads filesystem metadata (size, modified time, extension)
|
|
74
|
+
- Filters out system files (.DS_Store, __pycache__, etc.)
|
|
75
|
+
|
|
76
|
+
### 4. **Database Recording**
|
|
77
|
+
- Creates record objects with `status='fs-stats'`
|
|
78
|
+
- Sends batches of 1000 records to API
|
|
79
|
+
- API performs upsert by `original_path`
|
|
80
|
+
- Returns inserted/updated counts
|
|
81
|
+
|
|
82
|
+
### 5. **Progress & Summary**
|
|
83
|
+
- Real-time progress bar during processing
|
|
84
|
+
- Per-source summary after each directory
|
|
85
|
+
- Final summary with total counts
|
|
86
|
+
|
|
87
|
+
## Key Characteristics
|
|
88
|
+
|
|
89
|
+
- **No File Upload**: Files are not uploaded, only metadata is recorded
|
|
90
|
+
- **No Detection**: Document types are not detected (all `null`)
|
|
91
|
+
- **Duplicate Prevention**: Upsert by `original_path` prevents duplicates
|
|
92
|
+
- **Batch Processing**: Efficient handling of large file sets
|
|
93
|
+
- **Status**: All records marked as `fs-stats` and `PENDING`
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@arela/uploader",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.4",
|
|
4
4
|
"description": "CLI to upload files/directories to Arela",
|
|
5
5
|
"bin": {
|
|
6
6
|
"arela": "./src/index.js"
|
|
@@ -8,7 +8,9 @@
|
|
|
8
8
|
"type": "module",
|
|
9
9
|
"scripts": {
|
|
10
10
|
"start": "node ./src/index.js",
|
|
11
|
-
"test": "
|
|
11
|
+
"test": "NODE_OPTIONS=--experimental-vm-modules jest",
|
|
12
|
+
"test:watch": "NODE_OPTIONS=--experimental-vm-modules jest --watch",
|
|
13
|
+
"test:coverage": "NODE_OPTIONS=--experimental-vm-modules jest --coverage",
|
|
12
14
|
"format": "prettier --write \"src/**/*.js\""
|
|
13
15
|
},
|
|
14
16
|
"repository": {
|
|
@@ -38,10 +40,36 @@
|
|
|
38
40
|
"globby": "14.1.0",
|
|
39
41
|
"mime-types": "3.0.1",
|
|
40
42
|
"node-fetch": "3.3.2",
|
|
41
|
-
"office-text-extractor": "3.0.3"
|
|
43
|
+
"office-text-extractor": "3.0.3",
|
|
44
|
+
"p-limit": "^7.2.0",
|
|
45
|
+
"pdf-parse": "^2.4.5"
|
|
42
46
|
},
|
|
43
47
|
"devDependencies": {
|
|
48
|
+
"@jest/globals": "^30.2.0",
|
|
44
49
|
"@trivago/prettier-plugin-sort-imports": "5.2.2",
|
|
50
|
+
"jest": "^30.2.0",
|
|
45
51
|
"prettier": "3.5.3"
|
|
52
|
+
},
|
|
53
|
+
"jest": {
|
|
54
|
+
"testEnvironment": "node",
|
|
55
|
+
"transform": {},
|
|
56
|
+
"testMatch": [
|
|
57
|
+
"**/tests/**/*.test.js"
|
|
58
|
+
],
|
|
59
|
+
"testPathIgnorePatterns": [
|
|
60
|
+
"/node_modules/",
|
|
61
|
+
"/scripts/"
|
|
62
|
+
],
|
|
63
|
+
"collectCoverageFrom": [
|
|
64
|
+
"src/commands/**/*.js",
|
|
65
|
+
"!src/commands/**/index.js"
|
|
66
|
+
],
|
|
67
|
+
"coverageDirectory": "coverage",
|
|
68
|
+
"coverageReporters": [
|
|
69
|
+
"text",
|
|
70
|
+
"text-summary",
|
|
71
|
+
"html",
|
|
72
|
+
"lcov"
|
|
73
|
+
]
|
|
46
74
|
}
|
|
47
75
|
}
|