siftly 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +6 -0
- data/LICENSE +21 -0
- data/README.md +512 -0
- data/lib/siftly/aggregators.rb +20 -0
- data/lib/siftly/checker.rb +49 -0
- data/lib/siftly/config.rb +73 -0
- data/lib/siftly/errors.rb +11 -0
- data/lib/siftly/filter.rb +45 -0
- data/lib/siftly/filter_config.rb +55 -0
- data/lib/siftly/filter_result.rb +45 -0
- data/lib/siftly/instrumentation.rb +12 -0
- data/lib/siftly/pipeline.rb +139 -0
- data/lib/siftly/registry.rb +53 -0
- data/lib/siftly/result.rb +31 -0
- data/lib/siftly/version.rb +6 -0
- data/lib/siftly.rb +35 -0
- data/siftly.gemspec +25 -0
- metadata +90 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: a7542071989804851d01823da767383c5e8fb19a0f7af0964cc240ba040d8a35
|
|
4
|
+
data.tar.gz: 35e5c8553f42537ab0c6fb5437c240b909869e71fbe505e00a1334ee7de3fa39
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 1011fb8411cce769bcdf8f90e3efc8b10e4291b97945744bdc5ab5f8c126a618bf5ff6befce490621d46fb443943466e820c05f497a7521101ddb4ceb29cb825
|
|
7
|
+
data.tar.gz: 0d71948ed52554568fa9e62738fdc017062807d682bea8b7ffe08fe07faa03ce94d880ddc9619af1e19b2642493780008acd496e70480223e2004320eb7ab107
|
data/CHANGELOG.md
ADDED
data/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Tomos Rees
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,512 @@
|
|
|
1
|
+
# Siftly
|
|
2
|
+
|
|
3
|
+
`siftly` is the core runtime. It does not ship opinionated spam rules. It gives you:
|
|
4
|
+
|
|
5
|
+
- a filter contract
|
|
6
|
+
- a registry
|
|
7
|
+
- pipeline configuration
|
|
8
|
+
- result objects
|
|
9
|
+
- aggregation and failure handling
|
|
10
|
+
|
|
11
|
+
Use this gem directly if you are writing your own filters. Add one or more plugin gems if you want ready-made heuristics.
|
|
12
|
+
|
|
13
|
+
## Installation
|
|
14
|
+
|
|
15
|
+
```ruby
|
|
16
|
+
gem "siftly"
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
## Usage
|
|
20
|
+
|
|
21
|
+
Load the core gem plus any plugin gems you want to use, configure the active filters once, then call `Siftly.check`.
|
|
22
|
+
|
|
23
|
+
```ruby
|
|
24
|
+
require "siftly"
|
|
25
|
+
require "siftly/content"
|
|
26
|
+
require "siftly/links"
|
|
27
|
+
|
|
28
|
+
Siftly.configure do |config|
|
|
29
|
+
config.aggregator = :score
|
|
30
|
+
config.threshold = 1.0
|
|
31
|
+
config.failure_mode = :record
|
|
32
|
+
|
|
33
|
+
config.use :keyword_pack
|
|
34
|
+
config.use :shortener_link
|
|
35
|
+
|
|
36
|
+
config.filter :keyword_pack do |filter|
|
|
37
|
+
filter.keywords = ["seo agency", "buy backlinks", "guest post"]
|
|
38
|
+
filter.weight = 0.4
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
result = Siftly.check(
|
|
43
|
+
value: "Our SEO agency can buy backlinks. Details: https://bit.ly/demo",
|
|
44
|
+
attribute: :message,
|
|
45
|
+
context: { source: "contact_form" }
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
result.spam? # => true
|
|
49
|
+
result.score # => 1.3
|
|
50
|
+
result.reasons # => ["Matched 2 configured keyword terms", "Submission contains shortened URLs"]
|
|
51
|
+
result.matches.map(&:filter) # => [:keyword_pack, :shortener_link]
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
## Configuration Reference
|
|
55
|
+
|
|
56
|
+
`Siftly.configure` yields a `Siftly::Config` object. These are the supported global settings.
|
|
57
|
+
|
|
58
|
+
### `config.use(key)`
|
|
59
|
+
|
|
60
|
+
Enables a filter for all future `Siftly.check` calls unless that call passes an explicit `filters:` list.
|
|
61
|
+
|
|
62
|
+
Accepted values:
|
|
63
|
+
|
|
64
|
+
- a symbol such as `:keyword_pack`
|
|
65
|
+
- a string such as `"keyword_pack"`
|
|
66
|
+
|
|
67
|
+
Behavior:
|
|
68
|
+
|
|
69
|
+
- keys are normalized to symbols
|
|
70
|
+
- duplicate calls are ignored
|
|
71
|
+
- order is preserved, and filters run in that order
|
|
72
|
+
|
|
73
|
+
### `config.filter(key) { |filter| ... }`
|
|
74
|
+
|
|
75
|
+
Creates or updates a `Siftly::FilterConfig` for the named filter.
|
|
76
|
+
|
|
77
|
+
Accepted values for `key`:
|
|
78
|
+
|
|
79
|
+
- a symbol
|
|
80
|
+
- a string
|
|
81
|
+
|
|
82
|
+
Inside the block, `filter` is a mutable `Siftly::FilterConfig`.
|
|
83
|
+
|
|
84
|
+
You can set arbitrary keys on it:
|
|
85
|
+
|
|
86
|
+
```ruby
|
|
87
|
+
config.filter :keyword_pack do |filter|
|
|
88
|
+
filter.keywords = ["seo agency", "buy backlinks"]
|
|
89
|
+
filter.weight = 0.4
|
|
90
|
+
filter.min_hits = 2
|
|
91
|
+
end
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
Important detail:
|
|
95
|
+
|
|
96
|
+
- the core gem does not validate per-filter settings
|
|
97
|
+
- each filter decides which keys it reads
|
|
98
|
+
- unknown settings are simply stored and ignored unless the filter uses them
|
|
99
|
+
|
|
100
|
+
### `config.aggregator`
|
|
101
|
+
|
|
102
|
+
Controls how filter results are combined into the final spam decision.
|
|
103
|
+
|
|
104
|
+
Default:
|
|
105
|
+
|
|
106
|
+
- `:score`
|
|
107
|
+
|
|
108
|
+
Accepted values:
|
|
109
|
+
|
|
110
|
+
- `:score`
|
|
111
|
+
- `:weighted` as an alias for `:score`
|
|
112
|
+
- `:any`
|
|
113
|
+
- any object responding to `call(filter_results:, threshold:, context:)`
|
|
114
|
+
|
|
115
|
+
Built-in behavior:
|
|
116
|
+
|
|
117
|
+
- `:score` and `:weighted` sum all filter scores and mark spam when `score >= threshold`
|
|
118
|
+
- `:any` marks spam when any filter matches
|
|
119
|
+
|
|
120
|
+
Custom aggregator contract:
|
|
121
|
+
|
|
122
|
+
```ruby
|
|
123
|
+
class MyAggregator
|
|
124
|
+
def call(filter_results:, threshold:, context:)
|
|
125
|
+
{ spam: true_or_false, score: numeric_score }
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
If `aggregator` is set to anything else, Siftly raises `Siftly::InvalidAggregatorError`.
|
|
131
|
+
|
|
132
|
+
### `config.threshold`
|
|
133
|
+
|
|
134
|
+
Controls the spam cutoff used by score-based aggregation.
|
|
135
|
+
|
|
136
|
+
Default:
|
|
137
|
+
|
|
138
|
+
- `1.0`
|
|
139
|
+
|
|
140
|
+
Accepted values:
|
|
141
|
+
|
|
142
|
+
- any numeric value that can be converted with `to_f`
|
|
143
|
+
|
|
144
|
+
Notes:
|
|
145
|
+
|
|
146
|
+
- it matters for `:score`, `:weighted`, and most custom aggregators
|
|
147
|
+
- it does not affect the built-in `:any` aggregator
|
|
148
|
+
|
|
149
|
+
### `config.failure_mode`
|
|
150
|
+
|
|
151
|
+
Controls what happens when a filter raises an exception.
|
|
152
|
+
|
|
153
|
+
Default:
|
|
154
|
+
|
|
155
|
+
- `:record`
|
|
156
|
+
|
|
157
|
+
Accepted values:
|
|
158
|
+
|
|
159
|
+
- `:record`
|
|
160
|
+
- `:open`
|
|
161
|
+
- `:closed`
|
|
162
|
+
- `:raise`
|
|
163
|
+
|
|
164
|
+
Behavior:
|
|
165
|
+
|
|
166
|
+
- `:record` records the filter error, continues the pipeline, and treats that filter as a non-match with score `0.0`
|
|
167
|
+
- `:open` currently behaves the same as `:record`
|
|
168
|
+
- `:closed` records the filter error, forces that filter to match, and gives it a score equal to the pipeline threshold
|
|
169
|
+
- `:raise` re-raises the original exception immediately
|
|
170
|
+
|
|
171
|
+
If `failure_mode` is set to anything else, Siftly raises `Siftly::ConfigurationError`.
|
|
172
|
+
|
|
173
|
+
### `config.instrumenter`
|
|
174
|
+
|
|
175
|
+
Receives instrumentation events from the pipeline.
|
|
176
|
+
|
|
177
|
+
Default:
|
|
178
|
+
|
|
179
|
+
- `nil`
|
|
180
|
+
|
|
181
|
+
Accepted values:
|
|
182
|
+
|
|
183
|
+
- `nil`
|
|
184
|
+
- any object responding to `instrument(event, payload = {})`
|
|
185
|
+
|
|
186
|
+
Events emitted by the pipeline:
|
|
187
|
+
|
|
188
|
+
- `siftly.filter.started`
|
|
189
|
+
- `siftly.filter.finished`
|
|
190
|
+
- `siftly.pipeline.completed`
|
|
191
|
+
|
|
192
|
+
`payload` is a hash and varies by event. For example, `siftly.filter.finished` includes fields such as `filter`, `attribute`, `matched`, `score`, `duration_ms`, and `error` when applicable.
|
|
193
|
+
|
|
194
|
+
### `config.enabled_filters`
|
|
195
|
+
|
|
196
|
+
Read-only array of enabled filter keys.
|
|
197
|
+
|
|
198
|
+
Default:
|
|
199
|
+
|
|
200
|
+
- `[]`
|
|
201
|
+
|
|
202
|
+
Values returned:
|
|
203
|
+
|
|
204
|
+
- an array of symbols in execution order
|
|
205
|
+
|
|
206
|
+
### `config.filter_config_for(key)`
|
|
207
|
+
|
|
208
|
+
Returns a copy of the current `Siftly::FilterConfig` for that filter.
|
|
209
|
+
|
|
210
|
+
Accepted values for `key`:
|
|
211
|
+
|
|
212
|
+
- a symbol
|
|
213
|
+
- a string
|
|
214
|
+
|
|
215
|
+
Return behavior:
|
|
216
|
+
|
|
217
|
+
- returns an existing config copy if one has been set
|
|
218
|
+
- returns an empty config object for that key if one has not
|
|
219
|
+
|
|
220
|
+
### `config.filter_configs`
|
|
221
|
+
|
|
222
|
+
Returns a hash of all configured filter configs.
|
|
223
|
+
|
|
224
|
+
Return shape:
|
|
225
|
+
|
|
226
|
+
```ruby
|
|
227
|
+
{
|
|
228
|
+
keyword_pack: #<Siftly::FilterConfig ...>,
|
|
229
|
+
shortener_link: #<Siftly::FilterConfig ...>
|
|
230
|
+
}
|
|
231
|
+
```
|
|
232
|
+
|
|
233
|
+
### `config.dup`
|
|
234
|
+
|
|
235
|
+
Returns a deep copy of the configuration.
|
|
236
|
+
|
|
237
|
+
This duplicates:
|
|
238
|
+
|
|
239
|
+
- enabled filters
|
|
240
|
+
- filter configs
|
|
241
|
+
- aggregator
|
|
242
|
+
- threshold
|
|
243
|
+
- failure mode
|
|
244
|
+
- instrumenter reference
|
|
245
|
+
|
|
246
|
+
## `Siftly.check` Reference
|
|
247
|
+
|
|
248
|
+
`Siftly.check` accepts the following keyword arguments.
|
|
249
|
+
|
|
250
|
+
### `value:`
|
|
251
|
+
|
|
252
|
+
Required.
|
|
253
|
+
|
|
254
|
+
Accepted values:
|
|
255
|
+
|
|
256
|
+
- any object
|
|
257
|
+
|
|
258
|
+
Behavior:
|
|
259
|
+
|
|
260
|
+
- Siftly passes it to each filter as-is
|
|
261
|
+
- most filters call `to_s`, but that is filter-specific
|
|
262
|
+
|
|
263
|
+
### `attribute:`
|
|
264
|
+
|
|
265
|
+
Optional.
|
|
266
|
+
|
|
267
|
+
Accepted values:
|
|
268
|
+
|
|
269
|
+
- `nil`
|
|
270
|
+
- typically a symbol such as `:email` or `:message`
|
|
271
|
+
- strings also work if the filter handles them
|
|
272
|
+
|
|
273
|
+
Default:
|
|
274
|
+
|
|
275
|
+
- `nil`
|
|
276
|
+
|
|
277
|
+
### `record:`
|
|
278
|
+
|
|
279
|
+
Optional.
|
|
280
|
+
|
|
281
|
+
Accepted values:
|
|
282
|
+
|
|
283
|
+
- `nil`
|
|
284
|
+
- any object, typically a model or form object
|
|
285
|
+
|
|
286
|
+
Default:
|
|
287
|
+
|
|
288
|
+
- `nil`
|
|
289
|
+
|
|
290
|
+
### `context:`
|
|
291
|
+
|
|
292
|
+
Optional.
|
|
293
|
+
|
|
294
|
+
Accepted values:
|
|
295
|
+
|
|
296
|
+
- a hash
|
|
297
|
+
|
|
298
|
+
Default:
|
|
299
|
+
|
|
300
|
+
- `{}`
|
|
301
|
+
|
|
302
|
+
Use this for request metadata and external signals such as:
|
|
303
|
+
|
|
304
|
+
- IP address
|
|
305
|
+
- user agent
|
|
306
|
+
- form timing
|
|
307
|
+
- honeypot values
|
|
308
|
+
- precomputed fingerprints
|
|
309
|
+
|
|
310
|
+
### `filters:`
|
|
311
|
+
|
|
312
|
+
Optional.
|
|
313
|
+
|
|
314
|
+
Accepted values:
|
|
315
|
+
|
|
316
|
+
- `nil`
|
|
317
|
+
- an array of symbols or strings
|
|
318
|
+
|
|
319
|
+
Default:
|
|
320
|
+
|
|
321
|
+
- `nil`, which means "use `config.enabled_filters`"
|
|
322
|
+
|
|
323
|
+
Behavior:
|
|
324
|
+
|
|
325
|
+
- keys are normalized to symbols
|
|
326
|
+
- passing `filters:` replaces the globally enabled filter list for that call
|
|
327
|
+
|
|
328
|
+
### `filter_overrides:`
|
|
329
|
+
|
|
330
|
+
Optional.
|
|
331
|
+
|
|
332
|
+
Accepted values:
|
|
333
|
+
|
|
334
|
+
- a hash keyed by filter symbol or string
|
|
335
|
+
|
|
336
|
+
Default:
|
|
337
|
+
|
|
338
|
+
- `{}`
|
|
339
|
+
|
|
340
|
+
Example:
|
|
341
|
+
|
|
342
|
+
```ruby
|
|
343
|
+
Siftly.check(
|
|
344
|
+
value: "special phrase",
|
|
345
|
+
filters: [:keyword_pack],
|
|
346
|
+
filter_overrides: {
|
|
347
|
+
keyword_pack: {
|
|
348
|
+
keywords: ["special phrase"],
|
|
349
|
+
weight: 0.9
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
)
|
|
353
|
+
```
|
|
354
|
+
|
|
355
|
+
Behavior:
|
|
356
|
+
|
|
357
|
+
- overrides are merged into the configured `FilterConfig` for that call only
|
|
358
|
+
- string and symbol keys are both supported for filter names
|
|
359
|
+
|
|
360
|
+
### `aggregator:`
|
|
361
|
+
|
|
362
|
+
Optional per-call override for `config.aggregator`.
|
|
363
|
+
|
|
364
|
+
Accepted values:
|
|
365
|
+
|
|
366
|
+
- `:score`
|
|
367
|
+
- `:weighted`
|
|
368
|
+
- `:any`
|
|
369
|
+
- a custom aggregator object
|
|
370
|
+
|
|
371
|
+
Default:
|
|
372
|
+
|
|
373
|
+
- the configured global aggregator
|
|
374
|
+
|
|
375
|
+
### `threshold:`
|
|
376
|
+
|
|
377
|
+
Optional per-call override for `config.threshold`.
|
|
378
|
+
|
|
379
|
+
Accepted values:
|
|
380
|
+
|
|
381
|
+
- any numeric value convertible with `to_f`
|
|
382
|
+
|
|
383
|
+
Default:
|
|
384
|
+
|
|
385
|
+
- the configured global threshold
|
|
386
|
+
|
|
387
|
+
### `failure_mode:`
|
|
388
|
+
|
|
389
|
+
Optional per-call override for `config.failure_mode`.
|
|
390
|
+
|
|
391
|
+
Accepted values:
|
|
392
|
+
|
|
393
|
+
- `:record`
|
|
394
|
+
- `:open`
|
|
395
|
+
- `:closed`
|
|
396
|
+
- `:raise`
|
|
397
|
+
|
|
398
|
+
Default:
|
|
399
|
+
|
|
400
|
+
- the configured global failure mode
|
|
401
|
+
|
|
402
|
+
### `instrumenter:`
|
|
403
|
+
|
|
404
|
+
Optional per-call override for `config.instrumenter`.
|
|
405
|
+
|
|
406
|
+
Accepted values:
|
|
407
|
+
|
|
408
|
+
- `nil`
|
|
409
|
+
- any object responding to `instrument(event, payload = {})`
|
|
410
|
+
|
|
411
|
+
Default:
|
|
412
|
+
|
|
413
|
+
- the configured global instrumenter
|
|
414
|
+
|
|
415
|
+
## `Siftly::FilterConfig` Reference
|
|
416
|
+
|
|
417
|
+
`Siftly::FilterConfig` stores per-filter settings.
|
|
418
|
+
|
|
419
|
+
Supported methods:
|
|
420
|
+
|
|
421
|
+
- `key` returns the filter key as a symbol
|
|
422
|
+
- `config[:setting_name]` reads a stored setting
|
|
423
|
+
- `config.fetch(:setting_name, default)` reads a stored setting with fallback
|
|
424
|
+
- dynamic writers such as `config.weight = 0.7`
|
|
425
|
+
- dynamic readers such as `config.weight`
|
|
426
|
+
- `to_h` returns a copy of the settings hash
|
|
427
|
+
- `merge(overrides)` returns a new `FilterConfig` with merged overrides
|
|
428
|
+
|
|
429
|
+
Accepted setting values:
|
|
430
|
+
|
|
431
|
+
- any Ruby object
|
|
432
|
+
|
|
433
|
+
The core gem does not impose a schema here. Plugin filters define their own supported keys.
|
|
434
|
+
|
|
435
|
+
## Writing a filter
|
|
436
|
+
|
|
437
|
+
Subclass `Siftly::Filter`, call `register_as`, and return `result(...)` from `#call`.
|
|
438
|
+
|
|
439
|
+
```ruby
|
|
440
|
+
require "siftly"
|
|
441
|
+
|
|
442
|
+
class BlocklistFilter < Siftly::Filter
|
|
443
|
+
register_as :blocklist
|
|
444
|
+
|
|
445
|
+
def call(value:, attribute: nil, record: nil, context: {})
|
|
446
|
+
blocked = Array(config.fetch(:blocked_terms, []))
|
|
447
|
+
matched_terms = blocked.select { |term| value.to_s.downcase.include?(term.downcase) }
|
|
448
|
+
|
|
449
|
+
result(
|
|
450
|
+
matched: matched_terms.any?,
|
|
451
|
+
score: matched_terms.any? ? config.fetch(:weight, 1.0).to_f : 0.0,
|
|
452
|
+
reason: matched_terms.any? ? "Matched blocked terms" : nil,
|
|
453
|
+
metadata: { matched_terms: matched_terms, attribute: attribute, context_keys: context.keys }
|
|
454
|
+
)
|
|
455
|
+
end
|
|
456
|
+
end
|
|
457
|
+
|
|
458
|
+
Siftly::Registry.register(BlocklistFilter)
|
|
459
|
+
```
|
|
460
|
+
|
|
461
|
+
## Results
|
|
462
|
+
|
|
463
|
+
`Siftly.check` returns `Siftly::Result`.
|
|
464
|
+
|
|
465
|
+
- `spam?` tells you whether the pipeline classified the input as spam
|
|
466
|
+
- `score` is the summed score from all filter results
|
|
467
|
+
- `matches` returns only the matched `Siftly::FilterResult` objects
|
|
468
|
+
- `errors` returns only filters that failed
|
|
469
|
+
- `reasons` is a flat list of non-nil match reasons
|
|
470
|
+
- `filter_results` contains every filter result, matched or not
|
|
471
|
+
|
|
472
|
+
Each `Siftly::FilterResult` exposes:
|
|
473
|
+
|
|
474
|
+
- `filter`
|
|
475
|
+
- `matched?`
|
|
476
|
+
- `score`
|
|
477
|
+
- `reason`
|
|
478
|
+
- `metadata`
|
|
479
|
+
- `error?`
|
|
480
|
+
|
|
481
|
+
Additional result fields:
|
|
482
|
+
|
|
483
|
+
- `Siftly::Result#attribute`
|
|
484
|
+
- `Siftly::Result#value_preview`
|
|
485
|
+
- `Siftly::Result#threshold`
|
|
486
|
+
- `Siftly::Result#aggregator`
|
|
487
|
+
|
|
488
|
+
Additional filter result fields:
|
|
489
|
+
|
|
490
|
+
- `Siftly::FilterResult#error`
|
|
491
|
+
- `Siftly::FilterResult#duration_ms`
|
|
492
|
+
|
|
493
|
+
## Utility Methods
|
|
494
|
+
|
|
495
|
+
### `Siftly.config`
|
|
496
|
+
|
|
497
|
+
Returns the current global `Siftly::Config` instance.
|
|
498
|
+
|
|
499
|
+
### `Siftly.reset_configuration!`
|
|
500
|
+
|
|
501
|
+
Resets the global configuration back to defaults:
|
|
502
|
+
|
|
503
|
+
- no enabled filters
|
|
504
|
+
- no filter configs
|
|
505
|
+
- `aggregator = :score`
|
|
506
|
+
- `threshold = 1.0`
|
|
507
|
+
- `failure_mode = :record`
|
|
508
|
+
- `instrumenter = nil`
|
|
509
|
+
|
|
510
|
+
## Plugin loading
|
|
511
|
+
|
|
512
|
+
Plugin gems register their filters when required. If a check fails with an unknown filter error, you either forgot to add the plugin gem or forgot to require it.
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Siftly
|
|
4
|
+
module Aggregators
|
|
5
|
+
class Any
|
|
6
|
+
def call(filter_results:, threshold:, context:)
|
|
7
|
+
score = filter_results.sum(&:score)
|
|
8
|
+
{ spam: filter_results.any?(&:matched?), score: score }
|
|
9
|
+
end
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
class Score
|
|
13
|
+
def call(filter_results:, threshold:, context:)
|
|
14
|
+
score = filter_results.sum(&:score)
|
|
15
|
+
{ spam: score >= threshold.to_f, score: score }
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Siftly
|
|
4
|
+
class Checker
|
|
5
|
+
def initialize(config:)
|
|
6
|
+
@config = config
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
def call(
|
|
10
|
+
value:,
|
|
11
|
+
attribute: nil,
|
|
12
|
+
record: nil,
|
|
13
|
+
context: {},
|
|
14
|
+
filters: nil,
|
|
15
|
+
filter_overrides: {},
|
|
16
|
+
aggregator: nil,
|
|
17
|
+
threshold: nil,
|
|
18
|
+
failure_mode: nil,
|
|
19
|
+
instrumenter: nil
|
|
20
|
+
)
|
|
21
|
+
active_filters = resolve_filters(filters)
|
|
22
|
+
runtime_filter_configs = build_filter_configs(active_filters, filter_overrides)
|
|
23
|
+
|
|
24
|
+
Pipeline.new(
|
|
25
|
+
filter_keys: active_filters,
|
|
26
|
+
filter_configs: runtime_filter_configs,
|
|
27
|
+
aggregator: aggregator || config.aggregator,
|
|
28
|
+
threshold: threshold || config.threshold,
|
|
29
|
+
failure_mode: failure_mode || config.failure_mode,
|
|
30
|
+
instrumenter: instrumenter || config.instrumenter
|
|
31
|
+
).call(value: value, attribute: attribute, record: record, context: context)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
private
|
|
35
|
+
|
|
36
|
+
attr_reader :config
|
|
37
|
+
|
|
38
|
+
def resolve_filters(filters)
|
|
39
|
+
(filters || config.enabled_filters).map(&:to_sym)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def build_filter_configs(filter_keys, filter_overrides)
|
|
43
|
+
filter_keys.each_with_object({}) do |filter_key, result|
|
|
44
|
+
overrides = filter_overrides.fetch(filter_key, filter_overrides.fetch(filter_key.to_s, {}))
|
|
45
|
+
result[filter_key] = config.filter_config_for(filter_key).merge(overrides)
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Siftly
|
|
4
|
+
class Config
|
|
5
|
+
attr_accessor :aggregator, :threshold, :failure_mode, :instrumenter
|
|
6
|
+
attr_reader :enabled_filters
|
|
7
|
+
|
|
8
|
+
def initialize(
|
|
9
|
+
enabled_filters: [],
|
|
10
|
+
filter_configs: {},
|
|
11
|
+
aggregator: :score,
|
|
12
|
+
threshold: 1.0,
|
|
13
|
+
failure_mode: :record,
|
|
14
|
+
instrumenter: nil
|
|
15
|
+
)
|
|
16
|
+
@enabled_filters = enabled_filters.map(&:to_sym)
|
|
17
|
+
@filter_configs = normalize_filter_configs(filter_configs)
|
|
18
|
+
@aggregator = aggregator
|
|
19
|
+
@threshold = threshold
|
|
20
|
+
@failure_mode = failure_mode
|
|
21
|
+
@instrumenter = instrumenter
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def use(key)
|
|
25
|
+
key = key.to_sym
|
|
26
|
+
@enabled_filters << key unless @enabled_filters.include?(key)
|
|
27
|
+
key
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def filter(key)
|
|
31
|
+
filter_config = (@filter_configs[key.to_sym] ||= FilterConfig.new(key))
|
|
32
|
+
yield(filter_config) if block_given?
|
|
33
|
+
filter_config
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def filter_config_for(key)
|
|
37
|
+
existing = @filter_configs[key.to_sym]
|
|
38
|
+
existing ? existing.merge({}) : FilterConfig.new(key)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def filter_configs
|
|
42
|
+
@filter_configs.transform_values { |config| config.merge({}) }
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def dup
|
|
46
|
+
self.class.new(
|
|
47
|
+
enabled_filters: enabled_filters.dup,
|
|
48
|
+
filter_configs: filter_configs.transform_values(&:to_h),
|
|
49
|
+
aggregator: aggregator,
|
|
50
|
+
threshold: threshold,
|
|
51
|
+
failure_mode: failure_mode,
|
|
52
|
+
instrumenter: instrumenter
|
|
53
|
+
)
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
private
|
|
57
|
+
|
|
58
|
+
def normalize_filter_configs(filter_configs)
|
|
59
|
+
filter_configs.each_with_object({}) do |(key, value), result|
|
|
60
|
+
result[key.to_sym] =
|
|
61
|
+
case value
|
|
62
|
+
when FilterConfig
|
|
63
|
+
value.merge({})
|
|
64
|
+
when Hash
|
|
65
|
+
FilterConfig.new(key, value)
|
|
66
|
+
else
|
|
67
|
+
raise ConfigurationError, "expected filter config for #{key.inspect} to be a Hash or FilterConfig"
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Siftly
|
|
4
|
+
class Error < StandardError; end
|
|
5
|
+
class ConfigurationError < Error; end
|
|
6
|
+
class InvalidFilterError < Error; end
|
|
7
|
+
class DuplicateFilterError < Error; end
|
|
8
|
+
class UnknownFilterError < Error; end
|
|
9
|
+
class InvalidAggregatorError < Error; end
|
|
10
|
+
end
|
|
11
|
+
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Siftly
|
|
4
|
+
class Filter
|
|
5
|
+
class << self
|
|
6
|
+
attr_reader :key
|
|
7
|
+
|
|
8
|
+
def register_as(key)
|
|
9
|
+
@key = key.to_sym
|
|
10
|
+
end
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def initialize(config: {})
|
|
14
|
+
@config =
|
|
15
|
+
case config
|
|
16
|
+
when FilterConfig
|
|
17
|
+
config.merge({})
|
|
18
|
+
when Hash
|
|
19
|
+
FilterConfig.new(self.class.key || :unknown, config)
|
|
20
|
+
else
|
|
21
|
+
raise ConfigurationError, "filter config must be a Hash or FilterConfig"
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def call(value:, attribute: nil, record: nil, context: {})
|
|
26
|
+
raise NotImplementedError, "#{self.class} must implement #call"
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
private
|
|
30
|
+
|
|
31
|
+
attr_reader :config
|
|
32
|
+
|
|
33
|
+
def result(matched:, score: 0.0, reason: nil, metadata: {}, error: nil)
|
|
34
|
+
FilterResult.new(
|
|
35
|
+
filter: self.class.key,
|
|
36
|
+
matched: matched,
|
|
37
|
+
score: score,
|
|
38
|
+
reason: reason,
|
|
39
|
+
metadata: metadata,
|
|
40
|
+
error: error
|
|
41
|
+
)
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Siftly
|
|
4
|
+
class FilterConfig
|
|
5
|
+
attr_reader :key
|
|
6
|
+
|
|
7
|
+
def initialize(key, settings = {})
|
|
8
|
+
@key = key.to_sym
|
|
9
|
+
@settings = symbolize_keys(settings)
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def [](name)
|
|
13
|
+
@settings[name.to_sym]
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def fetch(name, *fallback)
|
|
17
|
+
@settings.fetch(name.to_sym, *fallback)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def to_h
|
|
21
|
+
@settings.dup
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def merge(overrides)
|
|
25
|
+
self.class.new(key, @settings.merge(symbolize_keys(overrides)))
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def method_missing(method_name, *arguments, &block)
|
|
29
|
+
return super if block
|
|
30
|
+
|
|
31
|
+
method = method_name.to_s
|
|
32
|
+
|
|
33
|
+
if method.end_with?("=")
|
|
34
|
+
@settings[method.delete_suffix("=").to_sym] = arguments.fetch(0)
|
|
35
|
+
elsif arguments.empty?
|
|
36
|
+
@settings[method_name]
|
|
37
|
+
else
|
|
38
|
+
super
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def respond_to_missing?(method_name, include_private = false)
|
|
43
|
+
method_name.to_s.end_with?("=") || @settings.key?(method_name.to_sym) || super
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
private
|
|
47
|
+
|
|
48
|
+
def symbolize_keys(hash)
|
|
49
|
+
hash.each_with_object({}) do |(key, value), result|
|
|
50
|
+
result[key.to_sym] = value
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Siftly
|
|
4
|
+
class FilterResult
|
|
5
|
+
attr_reader :filter, :score, :reason, :metadata, :error, :duration_ms
|
|
6
|
+
|
|
7
|
+
def initialize(filter:, matched:, score:, reason: nil, metadata: {}, error: nil, duration_ms: nil)
|
|
8
|
+
@filter = filter.to_sym
|
|
9
|
+
@matched = matched
|
|
10
|
+
@score = score.to_f
|
|
11
|
+
@reason = reason
|
|
12
|
+
@metadata = metadata.dup
|
|
13
|
+
@error = error
|
|
14
|
+
@duration_ms = duration_ms
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def matched?
|
|
18
|
+
@matched
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def error?
|
|
22
|
+
!error.nil?
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def with(
|
|
26
|
+
filter: @filter,
|
|
27
|
+
matched: @matched,
|
|
28
|
+
score: @score,
|
|
29
|
+
reason: @reason,
|
|
30
|
+
metadata: @metadata,
|
|
31
|
+
error: @error,
|
|
32
|
+
duration_ms: @duration_ms
|
|
33
|
+
)
|
|
34
|
+
self.class.new(
|
|
35
|
+
filter: filter,
|
|
36
|
+
matched: matched,
|
|
37
|
+
score: score,
|
|
38
|
+
reason: reason,
|
|
39
|
+
metadata: metadata,
|
|
40
|
+
error: error,
|
|
41
|
+
duration_ms: duration_ms
|
|
42
|
+
)
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Siftly
|
|
4
|
+
class Pipeline
|
|
5
|
+
VALID_FAILURE_MODES = %i[record open closed raise].freeze
|
|
6
|
+
|
|
7
|
+
def initialize(filter_keys:, filter_configs:, aggregator:, threshold:, failure_mode:, instrumenter:)
|
|
8
|
+
@filter_keys = filter_keys.map(&:to_sym)
|
|
9
|
+
@filter_configs = filter_configs
|
|
10
|
+
@aggregator = aggregator
|
|
11
|
+
@threshold = threshold.to_f
|
|
12
|
+
@failure_mode = normalize_failure_mode(failure_mode)
|
|
13
|
+
@instrumenter = instrumenter || Instrumentation::NullInstrumenter.new
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def call(value:, attribute: nil, record: nil, context: {})
|
|
17
|
+
filter_results = filter_keys.map do |filter_key|
|
|
18
|
+
run_filter(filter_key, value: value, attribute: attribute, record: record, context: context)
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
aggregation = resolve_aggregator.call(
|
|
22
|
+
filter_results: filter_results,
|
|
23
|
+
threshold: threshold,
|
|
24
|
+
context: { value: value, attribute: attribute, record: record, context: context }
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
result = Result.new(
|
|
28
|
+
spam: aggregation.fetch(:spam),
|
|
29
|
+
score: aggregation.fetch(:score),
|
|
30
|
+
filter_results: filter_results,
|
|
31
|
+
reasons: filter_results.map(&:reason).compact,
|
|
32
|
+
attribute: attribute,
|
|
33
|
+
value_preview: preview(value),
|
|
34
|
+
threshold: threshold,
|
|
35
|
+
aggregator: aggregator_name
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
instrumenter.instrument(
|
|
39
|
+
"siftly.pipeline.completed",
|
|
40
|
+
attribute: attribute,
|
|
41
|
+
filter_count: filter_keys.length,
|
|
42
|
+
matched_filters: result.matches.map(&:filter),
|
|
43
|
+
spam: result.spam?,
|
|
44
|
+
score: result.score
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
result
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
private
|
|
51
|
+
|
|
52
|
+
attr_reader :filter_keys, :filter_configs, :aggregator, :threshold, :failure_mode, :instrumenter
|
|
53
|
+
|
|
54
|
+
def run_filter(filter_key, value:, attribute:, record:, context:)
|
|
55
|
+
payload = { filter: filter_key, attribute: attribute }
|
|
56
|
+
start = monotonic_time
|
|
57
|
+
filter_class = Registry.resolve(filter_key)
|
|
58
|
+
filter = filter_class.new(config: filter_configs.fetch(filter_key))
|
|
59
|
+
|
|
60
|
+
instrumenter.instrument("siftly.filter.started", payload)
|
|
61
|
+
|
|
62
|
+
filter_result = filter.call(value: value, attribute: attribute, record: record, context: context)
|
|
63
|
+
raise InvalidFilterError, "#{filter_class} must return Siftly::FilterResult" unless filter_result.is_a?(FilterResult)
|
|
64
|
+
|
|
65
|
+
duration = elapsed_ms(start)
|
|
66
|
+
finalized = filter_result.with(duration_ms: duration)
|
|
67
|
+
|
|
68
|
+
instrumenter.instrument("siftly.filter.finished", payload.merge(matched: finalized.matched?, score: finalized.score, duration_ms: duration))
|
|
69
|
+
|
|
70
|
+
finalized
|
|
71
|
+
rescue UnknownFilterError
|
|
72
|
+
raise
|
|
73
|
+
rescue StandardError => error
|
|
74
|
+
raise if failure_mode == :raise
|
|
75
|
+
|
|
76
|
+
duration = elapsed_ms(start)
|
|
77
|
+
failed_result = build_failed_result(filter_key, error, duration)
|
|
78
|
+
|
|
79
|
+
instrumenter.instrument(
|
|
80
|
+
"siftly.filter.finished",
|
|
81
|
+
payload.merge(matched: failed_result.matched?, score: failed_result.score, duration_ms: duration, error: error.message)
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
failed_result
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def build_failed_result(filter_key, error, duration_ms)
|
|
88
|
+
matched = failure_mode == :closed
|
|
89
|
+
score = matched ? threshold : 0.0
|
|
90
|
+
reason = matched ? "Filter #{filter_key} failed and failure_mode=:closed forced a spam match" : nil
|
|
91
|
+
|
|
92
|
+
FilterResult.new(
|
|
93
|
+
filter: filter_key,
|
|
94
|
+
matched: matched,
|
|
95
|
+
score: score,
|
|
96
|
+
reason: reason,
|
|
97
|
+
metadata: { exception_class: error.class.name },
|
|
98
|
+
error: error.message,
|
|
99
|
+
duration_ms: duration_ms
|
|
100
|
+
)
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def resolve_aggregator
|
|
104
|
+
case aggregator
|
|
105
|
+
when :any
|
|
106
|
+
Aggregators::Any.new
|
|
107
|
+
when :score, :weighted
|
|
108
|
+
Aggregators::Score.new
|
|
109
|
+
else
|
|
110
|
+
return aggregator if aggregator.respond_to?(:call)
|
|
111
|
+
|
|
112
|
+
raise InvalidAggregatorError, "unsupported aggregator #{aggregator.inspect}"
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
def aggregator_name
|
|
117
|
+
aggregator.is_a?(Symbol) ? aggregator : aggregator.class.name
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def preview(value)
|
|
121
|
+
value.to_s[0, 80]
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
def normalize_failure_mode(mode)
|
|
125
|
+
mode = mode.to_sym
|
|
126
|
+
return mode if VALID_FAILURE_MODES.include?(mode)
|
|
127
|
+
|
|
128
|
+
raise ConfigurationError, "unsupported failure mode #{mode.inspect}"
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
def monotonic_time
|
|
132
|
+
Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
def elapsed_ms(start_time)
|
|
136
|
+
((monotonic_time - start_time) * 1000.0).round(3)
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
end
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Siftly
|
|
4
|
+
class Registry
|
|
5
|
+
class << self
|
|
6
|
+
def register(filter_class, replace: false, metadata: {})
|
|
7
|
+
validate_filter_class!(filter_class)
|
|
8
|
+
|
|
9
|
+
key = filter_class.key
|
|
10
|
+
raise DuplicateFilterError, "filter #{key.inspect} is already registered" if !replace && filters.key?(key)
|
|
11
|
+
|
|
12
|
+
filters[key] = { klass: filter_class, metadata: metadata.dup.freeze }
|
|
13
|
+
filter_class
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def fetch(key)
|
|
17
|
+
filters.fetch(key.to_sym) { raise UnknownFilterError, "unknown filter #{key.inspect}" }
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def resolve(key)
|
|
21
|
+
fetch(key).fetch(:klass)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def metadata_for(key)
|
|
25
|
+
fetch(key).fetch(:metadata)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def available
|
|
29
|
+
filters.keys.sort
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def registered?(key)
|
|
33
|
+
filters.key?(key.to_sym)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def clear
|
|
37
|
+
@filters = {}
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
private
|
|
41
|
+
|
|
42
|
+
def filters
|
|
43
|
+
@filters ||= {}
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def validate_filter_class!(filter_class)
|
|
47
|
+
raise InvalidFilterError, "registered filters must inherit from Siftly::Filter" unless filter_class < Filter
|
|
48
|
+
raise InvalidFilterError, "registered filters must call register_as" if filter_class.key.nil?
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Siftly
|
|
4
|
+
class Result
|
|
5
|
+
attr_reader :score, :filter_results, :reasons, :attribute, :value_preview, :threshold, :aggregator
|
|
6
|
+
|
|
7
|
+
def initialize(spam:, score:, filter_results:, reasons:, attribute:, value_preview:, threshold:, aggregator:)
|
|
8
|
+
@spam = spam
|
|
9
|
+
@score = score.to_f
|
|
10
|
+
@filter_results = filter_results.dup.freeze
|
|
11
|
+
@reasons = reasons.compact.freeze
|
|
12
|
+
@attribute = attribute
|
|
13
|
+
@value_preview = value_preview
|
|
14
|
+
@threshold = threshold
|
|
15
|
+
@aggregator = aggregator
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def spam?
|
|
19
|
+
@spam
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def matches
|
|
23
|
+
filter_results.select(&:matched?)
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def errors
|
|
27
|
+
filter_results.select(&:error?)
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
|
data/lib/siftly.rb
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "siftly/version"
|
|
4
|
+
require_relative "siftly/errors"
|
|
5
|
+
require_relative "siftly/filter_config"
|
|
6
|
+
require_relative "siftly/config"
|
|
7
|
+
require_relative "siftly/filter"
|
|
8
|
+
require_relative "siftly/filter_result"
|
|
9
|
+
require_relative "siftly/result"
|
|
10
|
+
require_relative "siftly/registry"
|
|
11
|
+
require_relative "siftly/instrumentation"
|
|
12
|
+
require_relative "siftly/aggregators"
|
|
13
|
+
require_relative "siftly/pipeline"
|
|
14
|
+
require_relative "siftly/checker"
|
|
15
|
+
|
|
16
|
+
module Siftly
|
|
17
|
+
class << self
|
|
18
|
+
def configure
|
|
19
|
+
yield(config)
|
|
20
|
+
config
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def config
|
|
24
|
+
@config ||= Config.new
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def check(**options)
|
|
28
|
+
Checker.new(config: config).call(**options)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def reset_configuration!
|
|
32
|
+
@config = Config.new
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
data/siftly.gemspec
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "lib/siftly/version"
|
|
4
|
+
|
|
5
|
+
Gem::Specification.new do |spec|
|
|
6
|
+
spec.name = "siftly"
|
|
7
|
+
spec.version = Siftly::VERSION
|
|
8
|
+
spec.authors = ["Tomos Rees"]
|
|
9
|
+
|
|
10
|
+
spec.summary = "Composable spam filtering core for Ruby applications."
|
|
11
|
+
spec.description = "Siftly provides a small, framework-agnostic core for registering spam filters, executing pipelines, and aggregating structured results."
|
|
12
|
+
spec.homepage = "https://github.com/tomosjohnrees/siftly"
|
|
13
|
+
spec.license = "MIT"
|
|
14
|
+
spec.required_ruby_version = ">= 3.2"
|
|
15
|
+
|
|
16
|
+
spec.metadata["source_code_uri"] = spec.homepage
|
|
17
|
+
spec.metadata["changelog_uri"] = "#{spec.homepage}/blob/main/CHANGELOG.md"
|
|
18
|
+
|
|
19
|
+
spec.files = Dir.glob("lib/**/*") + %w[CHANGELOG.md LICENSE README.md siftly.gemspec]
|
|
20
|
+
spec.bindir = "exe"
|
|
21
|
+
spec.require_paths = ["lib"]
|
|
22
|
+
|
|
23
|
+
spec.add_development_dependency "minitest", "~> 5.0"
|
|
24
|
+
spec.add_development_dependency "rake", "~> 13.0"
|
|
25
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: siftly
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Tomos Rees
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: exe
|
|
10
|
+
cert_chain: []
|
|
11
|
+
date: 2026-03-13 00:00:00.000000000 Z
|
|
12
|
+
dependencies:
|
|
13
|
+
- !ruby/object:Gem::Dependency
|
|
14
|
+
name: minitest
|
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
|
16
|
+
requirements:
|
|
17
|
+
- - "~>"
|
|
18
|
+
- !ruby/object:Gem::Version
|
|
19
|
+
version: '5.0'
|
|
20
|
+
type: :development
|
|
21
|
+
prerelease: false
|
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
23
|
+
requirements:
|
|
24
|
+
- - "~>"
|
|
25
|
+
- !ruby/object:Gem::Version
|
|
26
|
+
version: '5.0'
|
|
27
|
+
- !ruby/object:Gem::Dependency
|
|
28
|
+
name: rake
|
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
|
30
|
+
requirements:
|
|
31
|
+
- - "~>"
|
|
32
|
+
- !ruby/object:Gem::Version
|
|
33
|
+
version: '13.0'
|
|
34
|
+
type: :development
|
|
35
|
+
prerelease: false
|
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
37
|
+
requirements:
|
|
38
|
+
- - "~>"
|
|
39
|
+
- !ruby/object:Gem::Version
|
|
40
|
+
version: '13.0'
|
|
41
|
+
description: Siftly provides a small, framework-agnostic core for registering spam
|
|
42
|
+
filters, executing pipelines, and aggregating structured results.
|
|
43
|
+
email:
|
|
44
|
+
executables: []
|
|
45
|
+
extensions: []
|
|
46
|
+
extra_rdoc_files: []
|
|
47
|
+
files:
|
|
48
|
+
- CHANGELOG.md
|
|
49
|
+
- LICENSE
|
|
50
|
+
- README.md
|
|
51
|
+
- lib/siftly.rb
|
|
52
|
+
- lib/siftly/aggregators.rb
|
|
53
|
+
- lib/siftly/checker.rb
|
|
54
|
+
- lib/siftly/config.rb
|
|
55
|
+
- lib/siftly/errors.rb
|
|
56
|
+
- lib/siftly/filter.rb
|
|
57
|
+
- lib/siftly/filter_config.rb
|
|
58
|
+
- lib/siftly/filter_result.rb
|
|
59
|
+
- lib/siftly/instrumentation.rb
|
|
60
|
+
- lib/siftly/pipeline.rb
|
|
61
|
+
- lib/siftly/registry.rb
|
|
62
|
+
- lib/siftly/result.rb
|
|
63
|
+
- lib/siftly/version.rb
|
|
64
|
+
- siftly.gemspec
|
|
65
|
+
homepage: https://github.com/tomosjohnrees/siftly
|
|
66
|
+
licenses:
|
|
67
|
+
- MIT
|
|
68
|
+
metadata:
|
|
69
|
+
source_code_uri: https://github.com/tomosjohnrees/siftly
|
|
70
|
+
changelog_uri: https://github.com/tomosjohnrees/siftly/blob/main/CHANGELOG.md
|
|
71
|
+
post_install_message:
|
|
72
|
+
rdoc_options: []
|
|
73
|
+
require_paths:
|
|
74
|
+
- lib
|
|
75
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
76
|
+
requirements:
|
|
77
|
+
- - ">="
|
|
78
|
+
- !ruby/object:Gem::Version
|
|
79
|
+
version: '3.2'
|
|
80
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
81
|
+
requirements:
|
|
82
|
+
- - ">="
|
|
83
|
+
- !ruby/object:Gem::Version
|
|
84
|
+
version: '0'
|
|
85
|
+
requirements: []
|
|
86
|
+
rubygems_version: 3.0.3.1
|
|
87
|
+
signing_key:
|
|
88
|
+
specification_version: 4
|
|
89
|
+
summary: Composable spam filtering core for Ruby applications.
|
|
90
|
+
test_files: []
|