data_redactor 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cb5ea859ac53fad02d55edfe902ed084c2e344fdae54261ea96ea4cf84fb97ac
4
- data.tar.gz: b1989344c14e784949e1130478305c4d01f1f3771b44ba4f41eb2f2ca42f6d4f
3
+ metadata.gz: 0c2837740c12c4424fe8837a023c6043badc61c94c99a4b53a826e57f60c362f
4
+ data.tar.gz: fe57411af8c23f54a462a2c71d76e8e3306adb35958001a3918e817e5bceeabc
5
5
  SHA512:
6
- metadata.gz: 39ef5f9ca77539d7a1c7a6cb3c430dbd4ef7f068b67b57dbe72d8df051d36496193388440930cc5cce8e309f6760313e52b5073d3d342157841c4026210b8772
7
- data.tar.gz: a40435e0034cd18d28679fa384de8ae6f9e4b3dcbdc2d5e0d14e85b85add720f4a13a9b2919ea0782fdc2a3b42c31a5c5c1515c6671e4462e74e99ccef89b273
6
+ metadata.gz: d882f103607569f259bd8bba0cb10a34c1967564bbe81909c6ba877ccecd020b53efc8df350ab08b19169239b4be276a5bb28c54d8559a17914d10044d2aa4d3
7
+ data.tar.gz: b567563554f6f8549c9207b43596a3aba7ef33bb44a6efab4df02a8e3de851c06caf2afe253a7b0f01ac1d37b4808e4c99a5f4b421d395169b7525eabc26ad3f
data/CHANGELOG.md CHANGED
@@ -7,6 +7,24 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [0.6.0] - 2026-05-08
11
+
12
+ ### Added
13
+ - **Per-pattern allow / deny via `only:` / `except:`.** Both kwargs now accept a mix of Symbols (tags) and Strings (pattern names from `DataRedactor.pattern_names`). They can be combined: `only: :contact, except: ["email"]` redacts every contact pattern except email. Mixed-list shapes like `only: [:credentials, "iban_de"]` also work. Precedence: `except:` always wins when the two overlap.
14
+ - `DataRedactor.pattern_names` — array of every known pattern name (built-ins + currently registered custom).
15
+ - `DataRedactor::BUILTIN_PATTERN_NAMES` and `DataRedactor::BUILTIN_PATTERN_TAG_BITS` constants (frozen) exposing the compiled-in pattern roster.
16
+ - `DataRedactor::UnknownPatternError` raised when a String passed to `only:`/`except:` does not match any known pattern.
17
+ - YARD docs deploy job in `.github/workflows/ci.yml` publishes `bundle exec yard doc` output to GitHub Pages on every push to `main`.
18
+
19
+ ### Changed
20
+ - **C entry-point signatures.** `_redact(text, ph_mode, ph_str, enable_bits)` and `_scan(text, enable_bits)` now take a per-pattern enable bit array (built by the Ruby wrapper from `only:`/`except:`) instead of a tag bitmask. The public `DataRedactor.redact` / `.scan` API is fully backward compatible — only the underscore-prefixed C boundary changed. Single-pass: filtering happens in C, no second pass through `_scan`.
21
+ - `only:` and `except:` may now be combined (previously raised `ArgumentError` if both were passed).
22
+ - **Internal: C extension split into focused modules.** `ext/data_redactor/data_redactor.c` was a single ~1000-line file; it is now a 60-line entry point plus `patterns.{c,h}`, `placeholder.{c,h}`, `redact.{c,h}`, `scan.{c,h}`, `custom_patterns.{c,h}`, and `tags.h`. `extconf.rb` now globs every `.c` in the extension directory via `$srcs`, so adding a new module needs no Makefile edits.
23
+ - **YARD inline docs** — every public method on `DataRedactor` now has `@param`/`@return`/`@raise` annotations (100% coverage); `.yardopts` configures markdown rendering with the README as the front page.
24
+
25
+ ### Documentation
26
+ - README: gem version / CI / license badges; new "Thread safety" section clarifying that `redact`/`scan` are thread-safe but `add_pattern`/`remove_pattern`/`clear_custom_patterns!` are not (register custom patterns once at boot).
27
+
10
28
  ## [0.5.0] - 2026-05-02
11
29
 
12
30
  ### Added
@@ -62,6 +80,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
62
80
  - `DataRedactor.redact(text)` module function returning the input with every match replaced by `[REDACTED]`.
63
81
  - RSpec suite with one example per pattern.
64
82
 
65
- [Unreleased]: https://github.com/danielefrisanco/data_redactor/compare/v0.2.0...HEAD
83
+ [Unreleased]: https://github.com/danielefrisanco/data_redactor/compare/v0.6.0...HEAD
84
+ [0.6.0]: https://github.com/danielefrisanco/data_redactor/compare/v0.5.0...v0.6.0
66
85
  [0.2.0]: https://github.com/danielefrisanco/data_redactor/compare/v0.1.0...v0.2.0
67
86
  [0.1.0]: https://github.com/danielefrisanco/data_redactor/releases/tag/v0.1.0
@@ -0,0 +1,123 @@
1
+ #include "custom_patterns.h"
2
+ #include "redact.h" /* wrap_boundary */
3
+ #include <string.h>
4
+ #include <stdlib.h>
5
+
6
+ custom_pattern_t *custom_patterns = NULL;
7
+ int custom_count = 0;
8
+ int custom_cap = 0;
9
+
10
+ static int find_custom_by_name(const char *name) {
11
+ for (int i = 0; i < custom_count; i++) {
12
+ if (strcmp(custom_patterns[i].name, name) == 0) return i;
13
+ }
14
+ return -1;
15
+ }
16
+
17
+ static void free_custom_at(int idx) {
18
+ free(custom_patterns[idx].name);
19
+ free(custom_patterns[idx].source);
20
+ regfree(&custom_patterns[idx].compiled);
21
+ }
22
+
23
+ VALUE rb_add_pattern(VALUE self, VALUE rb_name, VALUE rb_source,
24
+ VALUE rb_tag_bit, VALUE rb_boundary) {
25
+ Check_Type(rb_name, T_STRING);
26
+ Check_Type(rb_source, T_STRING);
27
+
28
+ const char *name = StringValueCStr(rb_name);
29
+ const char *source = StringValueCStr(rb_source);
30
+ int tag_bit = NUM2INT(rb_tag_bit);
31
+ int boundary = NUM2INT(rb_boundary);
32
+
33
+ char *pat_to_compile;
34
+ char *wrapped = NULL;
35
+ if (boundary) {
36
+ wrapped = wrap_boundary(source);
37
+ if (!wrapped) rb_raise(rb_eNoMemError, "wrap_boundary allocation failed");
38
+ pat_to_compile = wrapped;
39
+ } else {
40
+ pat_to_compile = (char *)source;
41
+ }
42
+
43
+ regex_t compiled;
44
+ int ret = regcomp(&compiled, pat_to_compile, REG_EXTENDED);
45
+ free(wrapped);
46
+
47
+ if (ret != 0) {
48
+ char errbuf[256];
49
+ regerror(ret, &compiled, errbuf, sizeof(errbuf));
50
+ regfree(&compiled);
51
+ VALUE eClass = rb_const_get(rb_define_module("DataRedactor"),
52
+ rb_intern("InvalidPatternError"));
53
+ rb_raise(eClass, "%s", errbuf);
54
+ }
55
+
56
+ int idx = find_custom_by_name(name);
57
+ if (idx >= 0) {
58
+ free_custom_at(idx);
59
+ } else {
60
+ if (custom_count >= custom_cap) {
61
+ int new_cap = custom_cap == 0 ? 8 : custom_cap * 2;
62
+ custom_pattern_t *tmp = (custom_pattern_t *)realloc(
63
+ custom_patterns, sizeof(custom_pattern_t) * new_cap);
64
+ if (!tmp) {
65
+ regfree(&compiled);
66
+ rb_raise(rb_eNoMemError, "custom_patterns realloc failed");
67
+ }
68
+ custom_patterns = tmp;
69
+ custom_cap = new_cap;
70
+ }
71
+ idx = custom_count++;
72
+ }
73
+
74
+ custom_patterns[idx].name = strdup(name);
75
+ custom_patterns[idx].source = strdup(source);
76
+ custom_patterns[idx].compiled = compiled;
77
+ custom_patterns[idx].tag = tag_bit;
78
+ custom_patterns[idx].boundary = boundary;
79
+
80
+ if (!custom_patterns[idx].name || !custom_patterns[idx].source) {
81
+ rb_raise(rb_eNoMemError, "strdup failed");
82
+ }
83
+
84
+ return Qnil;
85
+ }
86
+
87
+ VALUE rb_remove_pattern(VALUE self, VALUE rb_name) {
88
+ Check_Type(rb_name, T_STRING);
89
+ const char *name = StringValueCStr(rb_name);
90
+
91
+ int idx = find_custom_by_name(name);
92
+ if (idx < 0) return Qfalse;
93
+
94
+ free_custom_at(idx);
95
+
96
+ for (int i = idx; i < custom_count - 1; i++) {
97
+ custom_patterns[i] = custom_patterns[i + 1];
98
+ }
99
+ custom_count--;
100
+
101
+ return Qtrue;
102
+ }
103
+
104
+ VALUE rb_clear_custom_patterns(VALUE self) {
105
+ for (int i = 0; i < custom_count; i++) {
106
+ free_custom_at(i);
107
+ }
108
+ custom_count = 0;
109
+ return Qnil;
110
+ }
111
+
112
+ VALUE rb_custom_patterns(VALUE self) {
113
+ VALUE arr = rb_ary_new_capa(custom_count);
114
+ for (int i = 0; i < custom_count; i++) {
115
+ VALUE h = rb_hash_new();
116
+ rb_hash_aset(h, ID2SYM(rb_intern("name")), rb_str_new_cstr(custom_patterns[i].name));
117
+ rb_hash_aset(h, ID2SYM(rb_intern("source")), rb_str_new_cstr(custom_patterns[i].source));
118
+ rb_hash_aset(h, ID2SYM(rb_intern("tag_bit")), INT2NUM(custom_patterns[i].tag));
119
+ rb_hash_aset(h, ID2SYM(rb_intern("boundary")), custom_patterns[i].boundary ? Qtrue : Qfalse);
120
+ rb_ary_push(arr, h);
121
+ }
122
+ return arr;
123
+ }
@@ -0,0 +1,25 @@
1
+ #ifndef DATA_REDACTOR_CUSTOM_PATTERNS_H
2
+ #define DATA_REDACTOR_CUSTOM_PATTERNS_H
3
+
4
+ #include <ruby.h>
5
+ #include <regex.h>
6
+
7
+ typedef struct {
8
+ char *name;
9
+ char *source; /* original POSIX ERE string, for introspection */
10
+ regex_t compiled;
11
+ int tag; /* TAG_* bit */
12
+ int boundary; /* 1 if compiled with boundary wrapper */
13
+ } custom_pattern_t;
14
+
15
+ extern custom_pattern_t *custom_patterns;
16
+ extern int custom_count;
17
+ extern int custom_cap;
18
+
19
+ VALUE rb_add_pattern(VALUE self, VALUE rb_name, VALUE rb_source,
20
+ VALUE rb_tag_bit, VALUE rb_boundary);
21
+ VALUE rb_remove_pattern(VALUE self, VALUE rb_name);
22
+ VALUE rb_clear_custom_patterns(VALUE self);
23
+ VALUE rb_custom_patterns(VALUE self);
24
+
25
+ #endif