@arela/uploader 1.0.24 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/docs/AUTO_PROCESSING_PIPELINE.md +258 -0
- package/docs/COMPLETE_USAGE_GUIDE.md +1363 -0
- package/docs/DATABASESERVICE_IMPROVEMENTS.md +546 -0
- package/docs/PASO_2_TEST_RESULTS.md +298 -0
- package/docs/PASO_3_PLAN.md +385 -0
- package/docs/PHASE_1_FILE_DETECTION.md +366 -0
- package/docs/PHASE_2_API_INTEGRATION.md +426 -0
- package/docs/PHASE_3_DATABASE_MANAGEMENT.md +480 -0
- package/docs/PHASE_4_FILE_OPERATIONS.md +448 -0
- package/docs/PHASE_5_WATCH_MODE.md +450 -0
- package/docs/PHASE_6_SIGNAL_HANDLING.md +472 -0
- package/docs/PHASE_7_ADVANCED_FEATURES.md +560 -0
- package/docs/PLAN_WATCH_FEATURE.md +417 -0
- package/docs/README.md +480 -0
- package/docs/SCHEMA_ALIGNMENT_SUMMARY.md +301 -0
- package/docs/SMARTWATCH_DATABASE_REFACTORING.md +181 -0
- package/docs/SMART_WATCH_DATABASE_CHANGES.md +502 -0
- package/docs/TESTING_WATCH_MODE.md +212 -0
- package/docs/WATCHER_API_IMPLEMENTATION.md +520 -0
- package/docs/WATCHER_API_INTEGRATION.md +562 -0
- package/docs/WATCHER_SETUP_GUIDE.md +614 -0
- package/docs/WATCH_ARCHITECTURE.md +395 -0
- package/docs/WATCH_AUTO_PIPELINE.md +334 -0
- package/docs/WATCH_CONFIGURATION.md +267 -0
- package/docs/WATCH_USAGE_GUIDE.md +567 -0
- package/docs/commands.md +14 -0
- package/package.json +1 -1
- package/src/commands/IdentifyCommand.js +11 -0
- package/src/config/config.js +2 -2
- package/src/file-detection.js +42 -1
- package/src/scoring/scoring-engine.js +40 -7
- package/src/services/LoggingService.js +5 -3
- package/.vscode/settings.json +0 -1
- package/coverage/IdentifyCommand.js.html +0 -1462
- package/coverage/PropagateCommand.js.html +0 -1507
- package/coverage/PushCommand.js.html +0 -1504
- package/coverage/ScanCommand.js.html +0 -1654
- package/coverage/UploadCommand.js.html +0 -1846
- package/coverage/WatchCommand.js.html +0 -4111
- package/coverage/base.css +0 -224
- package/coverage/block-navigation.js +0 -87
- package/coverage/favicon.png +0 -0
- package/coverage/index.html +0 -191
- package/coverage/lcov-report/IdentifyCommand.js.html +0 -1462
- package/coverage/lcov-report/PropagateCommand.js.html +0 -1507
- package/coverage/lcov-report/PushCommand.js.html +0 -1504
- package/coverage/lcov-report/ScanCommand.js.html +0 -1654
- package/coverage/lcov-report/UploadCommand.js.html +0 -1846
- package/coverage/lcov-report/WatchCommand.js.html +0 -4111
- package/coverage/lcov-report/base.css +0 -224
- package/coverage/lcov-report/block-navigation.js +0 -87
- package/coverage/lcov-report/favicon.png +0 -0
- package/coverage/lcov-report/index.html +0 -191
- package/coverage/lcov-report/prettify.css +0 -1
- package/coverage/lcov-report/prettify.js +0 -2
- package/coverage/lcov-report/sort-arrow-sprite.png +0 -0
- package/coverage/lcov-report/sorter.js +0 -210
- package/coverage/lcov.info +0 -1937
- package/coverage/prettify.css +0 -1
- package/coverage/prettify.js +0 -2
- package/coverage/sort-arrow-sprite.png +0 -0
- package/coverage/sorter.js +0 -210
- package/docs/API_ENDPOINTS_FOR_DETECTION.md +0 -647
- package/docs/API_RETRY_MECHANISM.md +0 -338
- package/docs/ARELA_IDENTIFY_IMPLEMENTATION.md +0 -489
- package/docs/ARELA_IDENTIFY_QUICKREF.md +0 -186
- package/docs/ARELA_PROPAGATE_IMPLEMENTATION.md +0 -581
- package/docs/ARELA_PROPAGATE_QUICKREF.md +0 -272
- package/docs/ARELA_PUSH_IMPLEMENTATION.md +0 -577
- package/docs/ARELA_PUSH_QUICKREF.md +0 -322
- package/docs/ARELA_SCAN_IMPLEMENTATION.md +0 -373
- package/docs/ARELA_SCAN_QUICKREF.md +0 -139
- package/docs/CROSS_PLATFORM_PATH_HANDLING.md +0 -597
- package/docs/DETECTION_ATTEMPT_TRACKING.md +0 -414
- package/docs/MIGRATION_UPLOADER_TO_FILE_STATS.md +0 -1020
- package/docs/MULTI_LEVEL_DIRECTORY_SCANNING.md +0 -494
- package/docs/QUICK_REFERENCE_API_DETECTION.md +0 -264
- package/docs/REFACTORING_SUMMARY_DETECT_PEDIMENTOS.md +0 -200
- package/docs/STATS_COMMAND_SEQUENCE_DIAGRAM.md +0 -287
- package/docs/STATS_COMMAND_SIMPLE.md +0 -93
|
@@ -36,17 +36,49 @@ import path from 'path';
|
|
|
36
36
|
|
|
37
37
|
import { FieldResult } from '../document-type-shared.js';
|
|
38
38
|
|
|
39
|
+
// Compile cache for string patterns (from DB matchers): stable across every
|
|
40
|
+
// document in a run, so compile once instead of per document. Invalid patterns
|
|
41
|
+
// cache as null and are treated as a non-match (parity with the TS engine).
|
|
42
|
+
const regexCache = new Map();
|
|
43
|
+
const REGEX_CACHE_MAX = 5000;
|
|
44
|
+
|
|
39
45
|
function toRegExp(clue) {
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
46
|
+
if (clue.pattern instanceof RegExp) {
|
|
47
|
+
// Local-seed patterns are already compiled; reset lastIndex so a g/y flag
|
|
48
|
+
// can't make repeated .test() calls stateful across documents.
|
|
49
|
+
clue.pattern.lastIndex = 0;
|
|
50
|
+
return clue.pattern;
|
|
51
|
+
}
|
|
52
|
+
const safeFlags = (clue.flags ?? '').replace(/[gy]/g, '');
|
|
53
|
+
const key = `${safeFlags} ${clue.pattern}`;
|
|
54
|
+
let re = regexCache.get(key);
|
|
55
|
+
if (re === undefined) {
|
|
56
|
+
try {
|
|
57
|
+
re = new RegExp(clue.pattern, safeFlags);
|
|
58
|
+
} catch {
|
|
59
|
+
re = null;
|
|
60
|
+
}
|
|
61
|
+
if (regexCache.size >= REGEX_CACHE_MAX) regexCache.clear();
|
|
62
|
+
regexCache.set(key, re);
|
|
63
|
+
}
|
|
64
|
+
return re;
|
|
43
65
|
}
|
|
44
66
|
|
|
67
|
+
// Bound the text a single regex runs on so an allowed (quadratic) pattern
|
|
68
|
+
// can't blow up on a megabyte-scale adversarial string. Real documents can
|
|
69
|
+
// legitimately exceed the cap (edocument XMLs embed multi-MB base64 PDFs with
|
|
70
|
+
// signature tags AFTER the blob), so instead of truncating we sample a HEAD +
|
|
71
|
+
// TAIL window — signatures live at the extremes, the blob in the middle.
|
|
72
|
+
// (Parity with the API TS engine.)
|
|
73
|
+
const MATCH_HEAD_CAP = 1_000_000;
|
|
74
|
+
const MATCH_TAIL_CAP = 262_144;
|
|
75
|
+
|
|
45
76
|
function clueTarget(clue, ctx) {
|
|
46
77
|
// FILENAME_REGEX tests the file name; every other kind tests the content.
|
|
47
|
-
|
|
48
|
-
? (ctx.fileName ?? '')
|
|
49
|
-
|
|
78
|
+
const raw =
|
|
79
|
+
clue.kind === 'FILENAME_REGEX' ? (ctx.fileName ?? '') : (ctx.source ?? '');
|
|
80
|
+
if (raw.length <= MATCH_HEAD_CAP + MATCH_TAIL_CAP) return raw;
|
|
81
|
+
return `${raw.slice(0, MATCH_HEAD_CAP)}\n${raw.slice(-MATCH_TAIL_CAP)}`;
|
|
50
82
|
}
|
|
51
83
|
|
|
52
84
|
/**
|
|
@@ -73,7 +105,8 @@ export function scoreMatcher(matcher, ctx) {
|
|
|
73
105
|
|
|
74
106
|
for (const clue of matcher.clues ?? []) {
|
|
75
107
|
const weight = clue.weight ?? 1;
|
|
76
|
-
const
|
|
108
|
+
const re = toRegExp(clue);
|
|
109
|
+
const hit = re ? re.test(clueTarget(clue, ctx)) : false;
|
|
77
110
|
|
|
78
111
|
if (clue.negative) {
|
|
79
112
|
if (hit) {
|
|
@@ -81,6 +81,10 @@ export class LoggingService {
|
|
|
81
81
|
*/
|
|
82
82
|
info(message) {
|
|
83
83
|
this.writeLog(message, 'info');
|
|
84
|
+
// Echo to console: run banners, stats and the 🧩 observability lines were
|
|
85
|
+
// file-only, so operators watching the terminal never saw them (the v1
|
|
86
|
+
// canary summary was invisible until someone grepped arela-upload.log).
|
|
87
|
+
console.log(message);
|
|
84
88
|
}
|
|
85
89
|
|
|
86
90
|
/**
|
|
@@ -89,9 +93,7 @@ export class LoggingService {
|
|
|
89
93
|
*/
|
|
90
94
|
warn(message) {
|
|
91
95
|
this.writeLog(message, 'warn');
|
|
92
|
-
|
|
93
|
-
console.warn(`⚠️ ${message}`);
|
|
94
|
-
}
|
|
96
|
+
console.warn(`⚠️ ${message}`);
|
|
95
97
|
}
|
|
96
98
|
|
|
97
99
|
/**
|
package/.vscode/settings.json
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{}
|