@arela/uploader 1.0.24 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/docs/AUTO_PROCESSING_PIPELINE.md +258 -0
- package/docs/COMPLETE_USAGE_GUIDE.md +1363 -0
- package/docs/DATABASESERVICE_IMPROVEMENTS.md +546 -0
- package/docs/PASO_2_TEST_RESULTS.md +298 -0
- package/docs/PASO_3_PLAN.md +385 -0
- package/docs/PHASE_1_FILE_DETECTION.md +366 -0
- package/docs/PHASE_2_API_INTEGRATION.md +426 -0
- package/docs/PHASE_3_DATABASE_MANAGEMENT.md +480 -0
- package/docs/PHASE_4_FILE_OPERATIONS.md +448 -0
- package/docs/PHASE_5_WATCH_MODE.md +450 -0
- package/docs/PHASE_6_SIGNAL_HANDLING.md +472 -0
- package/docs/PHASE_7_ADVANCED_FEATURES.md +560 -0
- package/docs/PLAN_WATCH_FEATURE.md +417 -0
- package/docs/README.md +480 -0
- package/docs/SCHEMA_ALIGNMENT_SUMMARY.md +301 -0
- package/docs/SMARTWATCH_DATABASE_REFACTORING.md +181 -0
- package/docs/SMART_WATCH_DATABASE_CHANGES.md +502 -0
- package/docs/TESTING_WATCH_MODE.md +212 -0
- package/docs/WATCHER_API_IMPLEMENTATION.md +520 -0
- package/docs/WATCHER_API_INTEGRATION.md +562 -0
- package/docs/WATCHER_SETUP_GUIDE.md +614 -0
- package/docs/WATCH_ARCHITECTURE.md +395 -0
- package/docs/WATCH_AUTO_PIPELINE.md +334 -0
- package/docs/WATCH_CONFIGURATION.md +267 -0
- package/docs/WATCH_USAGE_GUIDE.md +567 -0
- package/docs/commands.md +14 -0
- package/package.json +1 -1
- package/src/commands/IdentifyCommand.js +8 -0
- package/src/config/config.js +2 -2
- package/src/file-detection.js +42 -1
- package/src/scoring/scoring-engine.js +35 -7
- package/.vscode/settings.json +0 -1
- package/coverage/IdentifyCommand.js.html +0 -1462
- package/coverage/PropagateCommand.js.html +0 -1507
- package/coverage/PushCommand.js.html +0 -1504
- package/coverage/ScanCommand.js.html +0 -1654
- package/coverage/UploadCommand.js.html +0 -1846
- package/coverage/WatchCommand.js.html +0 -4111
- package/coverage/base.css +0 -224
- package/coverage/block-navigation.js +0 -87
- package/coverage/favicon.png +0 -0
- package/coverage/index.html +0 -191
- package/coverage/lcov-report/IdentifyCommand.js.html +0 -1462
- package/coverage/lcov-report/PropagateCommand.js.html +0 -1507
- package/coverage/lcov-report/PushCommand.js.html +0 -1504
- package/coverage/lcov-report/ScanCommand.js.html +0 -1654
- package/coverage/lcov-report/UploadCommand.js.html +0 -1846
- package/coverage/lcov-report/WatchCommand.js.html +0 -4111
- package/coverage/lcov-report/base.css +0 -224
- package/coverage/lcov-report/block-navigation.js +0 -87
- package/coverage/lcov-report/favicon.png +0 -0
- package/coverage/lcov-report/index.html +0 -191
- package/coverage/lcov-report/prettify.css +0 -1
- package/coverage/lcov-report/prettify.js +0 -2
- package/coverage/lcov-report/sort-arrow-sprite.png +0 -0
- package/coverage/lcov-report/sorter.js +0 -210
- package/coverage/lcov.info +0 -1937
- package/coverage/prettify.css +0 -1
- package/coverage/prettify.js +0 -2
- package/coverage/sort-arrow-sprite.png +0 -0
- package/coverage/sorter.js +0 -210
- package/docs/API_ENDPOINTS_FOR_DETECTION.md +0 -647
- package/docs/API_RETRY_MECHANISM.md +0 -338
- package/docs/ARELA_IDENTIFY_IMPLEMENTATION.md +0 -489
- package/docs/ARELA_IDENTIFY_QUICKREF.md +0 -186
- package/docs/ARELA_PROPAGATE_IMPLEMENTATION.md +0 -581
- package/docs/ARELA_PROPAGATE_QUICKREF.md +0 -272
- package/docs/ARELA_PUSH_IMPLEMENTATION.md +0 -577
- package/docs/ARELA_PUSH_QUICKREF.md +0 -322
- package/docs/ARELA_SCAN_IMPLEMENTATION.md +0 -373
- package/docs/ARELA_SCAN_QUICKREF.md +0 -139
- package/docs/CROSS_PLATFORM_PATH_HANDLING.md +0 -597
- package/docs/DETECTION_ATTEMPT_TRACKING.md +0 -414
- package/docs/MIGRATION_UPLOADER_TO_FILE_STATS.md +0 -1020
- package/docs/MULTI_LEVEL_DIRECTORY_SCANNING.md +0 -494
- package/docs/QUICK_REFERENCE_API_DETECTION.md +0 -264
- package/docs/REFACTORING_SUMMARY_DETECT_PEDIMENTOS.md +0 -200
- package/docs/STATS_COMMAND_SEQUENCE_DIAGRAM.md +0 -287
- package/docs/STATS_COMMAND_SIMPLE.md +0 -93
|
@@ -36,17 +36,44 @@ import path from 'path';
|
|
|
36
36
|
|
|
37
37
|
import { FieldResult } from '../document-type-shared.js';
|
|
38
38
|
|
|
39
|
+
// Compile cache for string patterns (from DB matchers): stable across every
|
|
40
|
+
// document in a run, so compile once instead of per document. Invalid patterns
|
|
41
|
+
// cache as null and are treated as a non-match (parity with the TS engine).
|
|
42
|
+
const regexCache = new Map();
|
|
43
|
+
const REGEX_CACHE_MAX = 5000;
|
|
44
|
+
|
|
39
45
|
function toRegExp(clue) {
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
46
|
+
if (clue.pattern instanceof RegExp) {
|
|
47
|
+
// Local-seed patterns are already compiled; reset lastIndex so a g/y flag
|
|
48
|
+
// can't make repeated .test() calls stateful across documents.
|
|
49
|
+
clue.pattern.lastIndex = 0;
|
|
50
|
+
return clue.pattern;
|
|
51
|
+
}
|
|
52
|
+
const safeFlags = (clue.flags ?? '').replace(/[gy]/g, '');
|
|
53
|
+
const key = `${safeFlags} ${clue.pattern}`;
|
|
54
|
+
let re = regexCache.get(key);
|
|
55
|
+
if (re === undefined) {
|
|
56
|
+
try {
|
|
57
|
+
re = new RegExp(clue.pattern, safeFlags);
|
|
58
|
+
} catch {
|
|
59
|
+
re = null;
|
|
60
|
+
}
|
|
61
|
+
if (regexCache.size >= REGEX_CACHE_MAX) regexCache.clear();
|
|
62
|
+
regexCache.set(key, re);
|
|
63
|
+
}
|
|
64
|
+
return re;
|
|
43
65
|
}
|
|
44
66
|
|
|
67
|
+
// Cap the text a single regex runs on. Real extracted document text is far
|
|
68
|
+
// below this; the cap only bounds pathological/crafted inputs so an allowed
|
|
69
|
+
// (quadratic) pattern can't blow up on a megabyte-scale adversarial string.
|
|
70
|
+
const MATCH_INPUT_CAP = 1_000_000;
|
|
71
|
+
|
|
45
72
|
function clueTarget(clue, ctx) {
|
|
46
73
|
// FILENAME_REGEX tests the file name; every other kind tests the content.
|
|
47
|
-
|
|
48
|
-
? (ctx.fileName ?? '')
|
|
49
|
-
|
|
74
|
+
const raw =
|
|
75
|
+
clue.kind === 'FILENAME_REGEX' ? (ctx.fileName ?? '') : (ctx.source ?? '');
|
|
76
|
+
return raw.length > MATCH_INPUT_CAP ? raw.slice(0, MATCH_INPUT_CAP) : raw;
|
|
50
77
|
}
|
|
51
78
|
|
|
52
79
|
/**
|
|
@@ -73,7 +100,8 @@ export function scoreMatcher(matcher, ctx) {
|
|
|
73
100
|
|
|
74
101
|
for (const clue of matcher.clues ?? []) {
|
|
75
102
|
const weight = clue.weight ?? 1;
|
|
76
|
-
const
|
|
103
|
+
const re = toRegExp(clue);
|
|
104
|
+
const hit = re ? re.test(clueTarget(clue, ctx)) : false;
|
|
77
105
|
|
|
78
106
|
if (clue.negative) {
|
|
79
107
|
if (hit) {
|
package/.vscode/settings.json
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{}
|