@arela/uploader 1.0.24 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/docs/AUTO_PROCESSING_PIPELINE.md +258 -0
  2. package/docs/COMPLETE_USAGE_GUIDE.md +1363 -0
  3. package/docs/DATABASESERVICE_IMPROVEMENTS.md +546 -0
  4. package/docs/PASO_2_TEST_RESULTS.md +298 -0
  5. package/docs/PASO_3_PLAN.md +385 -0
  6. package/docs/PHASE_1_FILE_DETECTION.md +366 -0
  7. package/docs/PHASE_2_API_INTEGRATION.md +426 -0
  8. package/docs/PHASE_3_DATABASE_MANAGEMENT.md +480 -0
  9. package/docs/PHASE_4_FILE_OPERATIONS.md +448 -0
  10. package/docs/PHASE_5_WATCH_MODE.md +450 -0
  11. package/docs/PHASE_6_SIGNAL_HANDLING.md +472 -0
  12. package/docs/PHASE_7_ADVANCED_FEATURES.md +560 -0
  13. package/docs/PLAN_WATCH_FEATURE.md +417 -0
  14. package/docs/README.md +480 -0
  15. package/docs/SCHEMA_ALIGNMENT_SUMMARY.md +301 -0
  16. package/docs/SMARTWATCH_DATABASE_REFACTORING.md +181 -0
  17. package/docs/SMART_WATCH_DATABASE_CHANGES.md +502 -0
  18. package/docs/TESTING_WATCH_MODE.md +212 -0
  19. package/docs/WATCHER_API_IMPLEMENTATION.md +520 -0
  20. package/docs/WATCHER_API_INTEGRATION.md +562 -0
  21. package/docs/WATCHER_SETUP_GUIDE.md +614 -0
  22. package/docs/WATCH_ARCHITECTURE.md +395 -0
  23. package/docs/WATCH_AUTO_PIPELINE.md +334 -0
  24. package/docs/WATCH_CONFIGURATION.md +267 -0
  25. package/docs/WATCH_USAGE_GUIDE.md +567 -0
  26. package/docs/commands.md +14 -0
  27. package/package.json +1 -1
  28. package/src/commands/IdentifyCommand.js +11 -0
  29. package/src/config/config.js +2 -2
  30. package/src/file-detection.js +42 -1
  31. package/src/scoring/scoring-engine.js +40 -7
  32. package/src/services/LoggingService.js +5 -3
  33. package/.vscode/settings.json +0 -1
  34. package/coverage/IdentifyCommand.js.html +0 -1462
  35. package/coverage/PropagateCommand.js.html +0 -1507
  36. package/coverage/PushCommand.js.html +0 -1504
  37. package/coverage/ScanCommand.js.html +0 -1654
  38. package/coverage/UploadCommand.js.html +0 -1846
  39. package/coverage/WatchCommand.js.html +0 -4111
  40. package/coverage/base.css +0 -224
  41. package/coverage/block-navigation.js +0 -87
  42. package/coverage/favicon.png +0 -0
  43. package/coverage/index.html +0 -191
  44. package/coverage/lcov-report/IdentifyCommand.js.html +0 -1462
  45. package/coverage/lcov-report/PropagateCommand.js.html +0 -1507
  46. package/coverage/lcov-report/PushCommand.js.html +0 -1504
  47. package/coverage/lcov-report/ScanCommand.js.html +0 -1654
  48. package/coverage/lcov-report/UploadCommand.js.html +0 -1846
  49. package/coverage/lcov-report/WatchCommand.js.html +0 -4111
  50. package/coverage/lcov-report/base.css +0 -224
  51. package/coverage/lcov-report/block-navigation.js +0 -87
  52. package/coverage/lcov-report/favicon.png +0 -0
  53. package/coverage/lcov-report/index.html +0 -191
  54. package/coverage/lcov-report/prettify.css +0 -1
  55. package/coverage/lcov-report/prettify.js +0 -2
  56. package/coverage/lcov-report/sort-arrow-sprite.png +0 -0
  57. package/coverage/lcov-report/sorter.js +0 -210
  58. package/coverage/lcov.info +0 -1937
  59. package/coverage/prettify.css +0 -1
  60. package/coverage/prettify.js +0 -2
  61. package/coverage/sort-arrow-sprite.png +0 -0
  62. package/coverage/sorter.js +0 -210
  63. package/docs/API_ENDPOINTS_FOR_DETECTION.md +0 -647
  64. package/docs/API_RETRY_MECHANISM.md +0 -338
  65. package/docs/ARELA_IDENTIFY_IMPLEMENTATION.md +0 -489
  66. package/docs/ARELA_IDENTIFY_QUICKREF.md +0 -186
  67. package/docs/ARELA_PROPAGATE_IMPLEMENTATION.md +0 -581
  68. package/docs/ARELA_PROPAGATE_QUICKREF.md +0 -272
  69. package/docs/ARELA_PUSH_IMPLEMENTATION.md +0 -577
  70. package/docs/ARELA_PUSH_QUICKREF.md +0 -322
  71. package/docs/ARELA_SCAN_IMPLEMENTATION.md +0 -373
  72. package/docs/ARELA_SCAN_QUICKREF.md +0 -139
  73. package/docs/CROSS_PLATFORM_PATH_HANDLING.md +0 -597
  74. package/docs/DETECTION_ATTEMPT_TRACKING.md +0 -414
  75. package/docs/MIGRATION_UPLOADER_TO_FILE_STATS.md +0 -1020
  76. package/docs/MULTI_LEVEL_DIRECTORY_SCANNING.md +0 -494
  77. package/docs/QUICK_REFERENCE_API_DETECTION.md +0 -264
  78. package/docs/REFACTORING_SUMMARY_DETECT_PEDIMENTOS.md +0 -200
  79. package/docs/STATS_COMMAND_SEQUENCE_DIAGRAM.md +0 -287
  80. package/docs/STATS_COMMAND_SIMPLE.md +0 -93
@@ -36,17 +36,49 @@ import path from 'path';
36
36
 
37
37
  import { FieldResult } from '../document-type-shared.js';
38
38
 
39
+ // Compile cache for string patterns (from DB matchers): stable across every
40
+ // document in a run, so compile once instead of per document. Invalid patterns
41
+ // cache as null and are treated as a non-match (parity with the TS engine).
42
+ const regexCache = new Map();
43
+ const REGEX_CACHE_MAX = 5000;
44
+
39
45
  function toRegExp(clue) {
40
- return clue.pattern instanceof RegExp
41
- ? clue.pattern
42
- : new RegExp(clue.pattern, clue.flags ?? '');
46
+ if (clue.pattern instanceof RegExp) {
47
+ // Local-seed patterns are already compiled; reset lastIndex so a g/y flag
48
+ // can't make repeated .test() calls stateful across documents.
49
+ clue.pattern.lastIndex = 0;
50
+ return clue.pattern;
51
+ }
52
+ const safeFlags = (clue.flags ?? '').replace(/[gy]/g, '');
53
+ const key = `${safeFlags} ${clue.pattern}`;
54
+ let re = regexCache.get(key);
55
+ if (re === undefined) {
56
+ try {
57
+ re = new RegExp(clue.pattern, safeFlags);
58
+ } catch {
59
+ re = null;
60
+ }
61
+ if (regexCache.size >= REGEX_CACHE_MAX) regexCache.clear();
62
+ regexCache.set(key, re);
63
+ }
64
+ return re;
43
65
  }
44
66
 
67
+ // Bound the text a single regex runs on so an allowed (quadratic) pattern
68
+ // can't blow up on a megabyte-scale adversarial string. Real documents can
69
+ // legitimately exceed the cap (edocument XMLs embed multi-MB base64 PDFs with
70
+ // signature tags AFTER the blob), so instead of truncating we sample a HEAD +
71
+ // TAIL window — signatures live at the extremes, the blob in the middle.
72
+ // (Parity with the API TS engine.)
73
+ const MATCH_HEAD_CAP = 1_000_000;
74
+ const MATCH_TAIL_CAP = 262_144;
75
+
45
76
  function clueTarget(clue, ctx) {
46
77
  // FILENAME_REGEX tests the file name; every other kind tests the content.
47
- return clue.kind === 'FILENAME_REGEX'
48
- ? (ctx.fileName ?? '')
49
- : (ctx.source ?? '');
78
+ const raw =
79
+ clue.kind === 'FILENAME_REGEX' ? (ctx.fileName ?? '') : (ctx.source ?? '');
80
+ if (raw.length <= MATCH_HEAD_CAP + MATCH_TAIL_CAP) return raw;
81
+ return `${raw.slice(0, MATCH_HEAD_CAP)}\n${raw.slice(-MATCH_TAIL_CAP)}`;
50
82
  }
51
83
 
52
84
  /**
@@ -73,7 +105,8 @@ export function scoreMatcher(matcher, ctx) {
73
105
 
74
106
  for (const clue of matcher.clues ?? []) {
75
107
  const weight = clue.weight ?? 1;
76
- const hit = toRegExp(clue).test(clueTarget(clue, ctx));
108
+ const re = toRegExp(clue);
109
+ const hit = re ? re.test(clueTarget(clue, ctx)) : false;
77
110
 
78
111
  if (clue.negative) {
79
112
  if (hit) {
@@ -81,6 +81,10 @@ export class LoggingService {
81
81
  */
82
82
  info(message) {
83
83
  this.writeLog(message, 'info');
84
+ // Echo to console: run banners, stats and the 🧩 observability lines were
85
+ // file-only, so operators watching the terminal never saw them (the v1
86
+ // canary summary was invisible until someone grepped arela-upload.log).
87
+ console.log(message);
84
88
  }
85
89
 
86
90
  /**
@@ -89,9 +93,7 @@ export class LoggingService {
89
93
  */
90
94
  warn(message) {
91
95
  this.writeLog(message, 'warn');
92
- if (this.isVerbose) {
93
- console.warn(`⚠️ ${message}`);
94
- }
96
+ console.warn(`⚠️ ${message}`);
95
97
  }
96
98
 
97
99
  /**
@@ -1 +0,0 @@
1
- {}