@arela/uploader 1.0.24 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. package/docs/AUTO_PROCESSING_PIPELINE.md +258 -0
  2. package/docs/COMPLETE_USAGE_GUIDE.md +1363 -0
  3. package/docs/DATABASESERVICE_IMPROVEMENTS.md +546 -0
  4. package/docs/PASO_2_TEST_RESULTS.md +298 -0
  5. package/docs/PASO_3_PLAN.md +385 -0
  6. package/docs/PHASE_1_FILE_DETECTION.md +366 -0
  7. package/docs/PHASE_2_API_INTEGRATION.md +426 -0
  8. package/docs/PHASE_3_DATABASE_MANAGEMENT.md +480 -0
  9. package/docs/PHASE_4_FILE_OPERATIONS.md +448 -0
  10. package/docs/PHASE_5_WATCH_MODE.md +450 -0
  11. package/docs/PHASE_6_SIGNAL_HANDLING.md +472 -0
  12. package/docs/PHASE_7_ADVANCED_FEATURES.md +560 -0
  13. package/docs/PLAN_WATCH_FEATURE.md +417 -0
  14. package/docs/README.md +480 -0
  15. package/docs/SCHEMA_ALIGNMENT_SUMMARY.md +301 -0
  16. package/docs/SMARTWATCH_DATABASE_REFACTORING.md +181 -0
  17. package/docs/SMART_WATCH_DATABASE_CHANGES.md +502 -0
  18. package/docs/TESTING_WATCH_MODE.md +212 -0
  19. package/docs/WATCHER_API_IMPLEMENTATION.md +520 -0
  20. package/docs/WATCHER_API_INTEGRATION.md +562 -0
  21. package/docs/WATCHER_SETUP_GUIDE.md +614 -0
  22. package/docs/WATCH_ARCHITECTURE.md +395 -0
  23. package/docs/WATCH_AUTO_PIPELINE.md +334 -0
  24. package/docs/WATCH_CONFIGURATION.md +267 -0
  25. package/docs/WATCH_USAGE_GUIDE.md +567 -0
  26. package/docs/commands.md +14 -0
  27. package/package.json +1 -1
  28. package/src/commands/IdentifyCommand.js +8 -0
  29. package/src/config/config.js +2 -2
  30. package/src/file-detection.js +42 -1
  31. package/src/scoring/scoring-engine.js +35 -7
  32. package/.vscode/settings.json +0 -1
  33. package/coverage/IdentifyCommand.js.html +0 -1462
  34. package/coverage/PropagateCommand.js.html +0 -1507
  35. package/coverage/PushCommand.js.html +0 -1504
  36. package/coverage/ScanCommand.js.html +0 -1654
  37. package/coverage/UploadCommand.js.html +0 -1846
  38. package/coverage/WatchCommand.js.html +0 -4111
  39. package/coverage/base.css +0 -224
  40. package/coverage/block-navigation.js +0 -87
  41. package/coverage/favicon.png +0 -0
  42. package/coverage/index.html +0 -191
  43. package/coverage/lcov-report/IdentifyCommand.js.html +0 -1462
  44. package/coverage/lcov-report/PropagateCommand.js.html +0 -1507
  45. package/coverage/lcov-report/PushCommand.js.html +0 -1504
  46. package/coverage/lcov-report/ScanCommand.js.html +0 -1654
  47. package/coverage/lcov-report/UploadCommand.js.html +0 -1846
  48. package/coverage/lcov-report/WatchCommand.js.html +0 -4111
  49. package/coverage/lcov-report/base.css +0 -224
  50. package/coverage/lcov-report/block-navigation.js +0 -87
  51. package/coverage/lcov-report/favicon.png +0 -0
  52. package/coverage/lcov-report/index.html +0 -191
  53. package/coverage/lcov-report/prettify.css +0 -1
  54. package/coverage/lcov-report/prettify.js +0 -2
  55. package/coverage/lcov-report/sort-arrow-sprite.png +0 -0
  56. package/coverage/lcov-report/sorter.js +0 -210
  57. package/coverage/lcov.info +0 -1937
  58. package/coverage/prettify.css +0 -1
  59. package/coverage/prettify.js +0 -2
  60. package/coverage/sort-arrow-sprite.png +0 -0
  61. package/coverage/sorter.js +0 -210
  62. package/docs/API_ENDPOINTS_FOR_DETECTION.md +0 -647
  63. package/docs/API_RETRY_MECHANISM.md +0 -338
  64. package/docs/ARELA_IDENTIFY_IMPLEMENTATION.md +0 -489
  65. package/docs/ARELA_IDENTIFY_QUICKREF.md +0 -186
  66. package/docs/ARELA_PROPAGATE_IMPLEMENTATION.md +0 -581
  67. package/docs/ARELA_PROPAGATE_QUICKREF.md +0 -272
  68. package/docs/ARELA_PUSH_IMPLEMENTATION.md +0 -577
  69. package/docs/ARELA_PUSH_QUICKREF.md +0 -322
  70. package/docs/ARELA_SCAN_IMPLEMENTATION.md +0 -373
  71. package/docs/ARELA_SCAN_QUICKREF.md +0 -139
  72. package/docs/CROSS_PLATFORM_PATH_HANDLING.md +0 -597
  73. package/docs/DETECTION_ATTEMPT_TRACKING.md +0 -414
  74. package/docs/MIGRATION_UPLOADER_TO_FILE_STATS.md +0 -1020
  75. package/docs/MULTI_LEVEL_DIRECTORY_SCANNING.md +0 -494
  76. package/docs/QUICK_REFERENCE_API_DETECTION.md +0 -264
  77. package/docs/REFACTORING_SUMMARY_DETECT_PEDIMENTOS.md +0 -200
  78. package/docs/STATS_COMMAND_SEQUENCE_DIAGRAM.md +0 -287
  79. package/docs/STATS_COMMAND_SIMPLE.md +0 -93
@@ -36,17 +36,44 @@ import path from 'path';
36
36
 
37
37
  import { FieldResult } from '../document-type-shared.js';
38
38
 
39
+ // Compile cache for string patterns (from DB matchers): stable across every
40
+ // document in a run, so compile once instead of per document. Invalid patterns
41
+ // cache as null and are treated as a non-match (parity with the TS engine).
42
+ const regexCache = new Map();
43
+ const REGEX_CACHE_MAX = 5000;
44
+
39
45
  function toRegExp(clue) {
40
- return clue.pattern instanceof RegExp
41
- ? clue.pattern
42
- : new RegExp(clue.pattern, clue.flags ?? '');
46
+ if (clue.pattern instanceof RegExp) {
47
+ // Local-seed patterns are already compiled; reset lastIndex so a g/y flag
48
+ // can't make repeated .test() calls stateful across documents.
49
+ clue.pattern.lastIndex = 0;
50
+ return clue.pattern;
51
+ }
52
+ const safeFlags = (clue.flags ?? '').replace(/[gy]/g, '');
53
+ const key = `${safeFlags} ${clue.pattern}`;
54
+ let re = regexCache.get(key);
55
+ if (re === undefined) {
56
+ try {
57
+ re = new RegExp(clue.pattern, safeFlags);
58
+ } catch {
59
+ re = null;
60
+ }
61
+ if (regexCache.size >= REGEX_CACHE_MAX) regexCache.clear();
62
+ regexCache.set(key, re);
63
+ }
64
+ return re;
43
65
  }
44
66
 
67
+ // Cap the text a single regex runs on. Real extracted document text is far
68
+ // below this; the cap only bounds pathological/crafted inputs so an allowed
69
+ // (quadratic) pattern can't blow up on a megabyte-scale adversarial string.
70
+ const MATCH_INPUT_CAP = 1_000_000;
71
+
45
72
  function clueTarget(clue, ctx) {
46
73
  // FILENAME_REGEX tests the file name; every other kind tests the content.
47
- return clue.kind === 'FILENAME_REGEX'
48
- ? (ctx.fileName ?? '')
49
- : (ctx.source ?? '');
74
+ const raw =
75
+ clue.kind === 'FILENAME_REGEX' ? (ctx.fileName ?? '') : (ctx.source ?? '');
76
+ return raw.length > MATCH_INPUT_CAP ? raw.slice(0, MATCH_INPUT_CAP) : raw;
50
77
  }
51
78
 
52
79
  /**
@@ -73,7 +100,8 @@ export function scoreMatcher(matcher, ctx) {
73
100
 
74
101
  for (const clue of matcher.clues ?? []) {
75
102
  const weight = clue.weight ?? 1;
76
- const hit = toRegExp(clue).test(clueTarget(clue, ctx));
103
+ const re = toRegExp(clue);
104
+ const hit = re ? re.test(clueTarget(clue, ctx)) : false;
77
105
 
78
106
  if (clue.negative) {
79
107
  if (hit) {
@@ -1 +0,0 @@
1
- {}