selma 0.0.6 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 92fdde974708c332244cc1617d24a07b286588592efd1f17c78016d4d580cbba
4
- data.tar.gz: b69f34c2509f040b28e25cfbb94ba6d607f6603b2e53853e657b05b29bb0c06e
3
+ metadata.gz: 9d12ee9e6f3513dac3d511e1289c8719a2746dc0c565dd838c04e8cdab59bc6e
4
+ data.tar.gz: 75616f7a40ee931b88768cf27e92d97e38e3c8a31d897567368eeeb5ae0b6957
5
5
  SHA512:
6
- metadata.gz: '03714059152b3d9419893b0e692b889d3db577a8ba257bb7e8e9c0f91794d19afff6f98846332be3d1d188570d6226d4f514813e15bf337b67c659b97a60e582'
7
- data.tar.gz: 25cc4d1e666ca6ef616e2cd4ecf47d983dad8e5f61cdf28470f1d775d2bbeb033b6234d898e37376540d302fbd9373c25ca46d1a778252e2025c325eb53b203e
6
+ metadata.gz: 15c950e3689390a1e7a32f60191aa29a9f351ef8f7c84127552c0cd936bfb3ae858aa79050ab9c418234fa50e2fb2a3b11a95606bdacd041dfd69a6d66b3b949
7
+ data.tar.gz: e02be77c5b4a4e4f61641cc03b65103c2771cc3164caa456ecf6a40fbaa03e627a386265629f8fe77d4c277f0ed969d9220d589c740a3bd4a3e8f532168bf8c8
data/README.md CHANGED
@@ -37,7 +37,9 @@ sanitizer_config = {
37
37
  }
38
38
  sanitizer = Selma::Sanitizer.new(sanitizer_config)
39
39
  rewriter = Selma::Rewriter.new(sanitizer: sanitizer, handlers: [MatchElementRewrite.new, MatchTextRewrite.new])
40
- rewriter(html)
40
+ # removes any element that is not ["b", "em", "i", "strong", "u"];
41
+ # then calls `MatchElementRewrite` and `MatchTextRewrite` on matching HTML elements
42
+ rewriter.rewrite(html)
41
43
  ```
42
44
 
43
45
  Here's a look at each individual part.
data/ext/selma/extconf.rb CHANGED
@@ -3,4 +3,4 @@ require "rb_sys/mkmf"
3
3
 
4
4
  require_relative "_util"
5
5
 
6
- create_rust_makefile("selma")
6
+ create_rust_makefile("selma/selma")
@@ -4,8 +4,39 @@ module Selma
4
4
  class Sanitizer
5
5
  module Config
6
6
  BASIC = freeze_config(
7
- elements: ["a", "abbr", "blockquote", "b", "br", "cite", "code", "dd", "dfn", "dl", "dt", "em", "i", "kbd",
8
- "li", "mark", "ol", "p", "pre", "q", "s", "samp", "small", "strike", "strong", "sub", "sup", "time", "u", "ul", "var",],
7
+ elements: [
8
+ "a",
9
+ "abbr",
10
+ "blockquote",
11
+ "b",
12
+ "br",
13
+ "cite",
14
+ "code",
15
+ "dd",
16
+ "dfn",
17
+ "dl",
18
+ "dt",
19
+ "em",
20
+ "i",
21
+ "kbd",
22
+ "li",
23
+ "mark",
24
+ "ol",
25
+ "p",
26
+ "pre",
27
+ "q",
28
+ "s",
29
+ "samp",
30
+ "small",
31
+ "strike",
32
+ "strong",
33
+ "sub",
34
+ "sup",
35
+ "time",
36
+ "u",
37
+ "ul",
38
+ "var",
39
+ ],
9
40
 
10
41
  attributes: {
11
42
  "a" => ["href"],
@@ -33,13 +33,49 @@ module Selma
33
33
 
34
34
  # An Array of element names whose contents will be removed. The contents
35
35
  # of all other filtered elements will be left behind.
36
- remove_contents: ["iframe", "math", "noembed", "noframes", "noscript", "plaintext", "script", "style", "svg",
37
- "xmp",],
36
+ remove_contents: [
37
+ "iframe",
38
+ "math",
39
+ "noembed",
40
+ "noframes",
41
+ "noscript",
42
+ "plaintext",
43
+ "script",
44
+ "style",
45
+ "svg",
46
+ "xmp",
47
+ ],
38
48
 
39
49
  # Elements which, when removed, should have their contents surrounded by
40
50
  # whitespace.
41
- whitespace_elements: ["address", "article", "aside", "blockquote", "br", "dd", "div", "dl", "dt", "footer",
42
- "h1", "h2", "h3", "h4", "h5", "h6", "header", "hgroup", "hr", "li", "nav", "ol", "p", "pre", "section", "ul",],
51
+ whitespace_elements: [
52
+ "address",
53
+ "article",
54
+ "aside",
55
+ "blockquote",
56
+ "br",
57
+ "dd",
58
+ "div",
59
+ "dl",
60
+ "dt",
61
+ "footer",
62
+ "h1",
63
+ "h2",
64
+ "h3",
65
+ "h4",
66
+ "h5",
67
+ "h6",
68
+ "header",
69
+ "hgroup",
70
+ "hr",
71
+ "li",
72
+ "nav",
73
+ "ol",
74
+ "p",
75
+ "pre",
76
+ "section",
77
+ "ul",
78
+ ],
43
79
  )
44
80
  end
45
81
  end
@@ -4,12 +4,60 @@ module Selma
4
4
  class Sanitizer
5
5
  module Config
6
6
  RELAXED = freeze_config(
7
- elements: BASIC[:elements] + ["address", "article", "aside", "bdi", "bdo", "body", "caption", "col",
8
- "colgroup", "data", "del", "div", "figcaption", "figure", "footer", "h1", "h2", "h3", "h4", "h5", "h6", "head", "header", "hgroup", "hr", "html", "img", "ins", "main", "nav", "rp", "rt", "ruby", "section", "span", "style", "summary", "sup", "table", "tbody", "td", "tfoot", "th", "thead", "title", "tr", "wbr",],
7
+ elements: BASIC[:elements] + [
8
+ "address",
9
+ "article",
10
+ "aside",
11
+ "bdi",
12
+ "bdo",
13
+ "body",
14
+ "caption",
15
+ "col",
16
+ "colgroup",
17
+ "data",
18
+ "del",
19
+ "div",
20
+ "figcaption",
21
+ "figure",
22
+ "footer",
23
+ "h1",
24
+ "h2",
25
+ "h3",
26
+ "h4",
27
+ "h5",
28
+ "h6",
29
+ "head",
30
+ "header",
31
+ "hgroup",
32
+ "hr",
33
+ "html",
34
+ "img",
35
+ "ins",
36
+ "main",
37
+ "nav",
38
+ "rp",
39
+ "rt",
40
+ "ruby",
41
+ "section",
42
+ "span",
43
+ "style",
44
+ "summary",
45
+ "sup",
46
+ "table",
47
+ "tbody",
48
+ "td",
49
+ "tfoot",
50
+ "th",
51
+ "thead",
52
+ "title",
53
+ "tr",
54
+ "wbr",
55
+ ],
9
56
 
10
57
  allow_doctype: true,
11
58
 
12
- attributes: merge(BASIC[:attributes],
59
+ attributes: merge(
60
+ BASIC[:attributes],
13
61
  :all => ["class", "dir", "hidden", "id", "lang", "style", "tabindex", "title", "translate"],
14
62
  "a" => ["href", "hreflang", "name", "rel"],
15
63
  "col" => ["span", "width"],
@@ -21,16 +69,29 @@ module Selma
21
69
  "li" => ["value"],
22
70
  "ol" => ["reversed", "start", "type"],
23
71
  "style" => ["media", "scoped", "type"],
24
- "table" => ["align", "bgcolor", "border", "cellpadding", "cellspacing", "frame", "rules", "sortable",
25
- "summary", "width",],
72
+ "table" => [
73
+ "align",
74
+ "bgcolor",
75
+ "border",
76
+ "cellpadding",
77
+ "cellspacing",
78
+ "frame",
79
+ "rules",
80
+ "sortable",
81
+ "summary",
82
+ "width",
83
+ ],
26
84
  "td" => ["abbr", "align", "axis", "colspan", "headers", "rowspan", "valign", "width"],
27
85
  "th" => ["abbr", "align", "axis", "colspan", "headers", "rowspan", "scope", "sorted", "valign", "width"],
28
- "ul" => ["type"]),
86
+ "ul" => ["type"],
87
+ ),
29
88
 
30
- protocols: merge(BASIC[:protocols],
89
+ protocols: merge(
90
+ BASIC[:protocols],
31
91
  "del" => { "cite" => ["http", "https", :relative] },
32
92
  "img" => { "src" => ["http", "https", :relative] },
33
- "ins" => { "cite" => ["http", "https", :relative] }),
93
+ "ins" => { "cite" => ["http", "https", :relative] },
94
+ ),
34
95
  )
35
96
  end
36
97
  end
data/lib/selma/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Selma
4
- VERSION = "0.0.6"
4
+ VERSION = "0.0.7"
5
5
  end
data/selma.gemspec CHANGED
@@ -24,7 +24,7 @@ Gem::Specification.new do |spec|
24
24
  spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
25
25
 
26
26
  spec.require_paths = ["lib"]
27
- spec.extensions = ["ext/selma/Cargo.toml"]
27
+ spec.extensions = ["ext/selma/extconf.rb"]
28
28
 
29
29
  spec.metadata = {
30
30
  "allowed_push_host" => "https://rubygems.org",
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: selma
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
4
+ version: 0.0.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Garen J. Torikian
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-12-28 00:00:00.000000000 Z
11
+ date: 2023-01-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys
@@ -66,12 +66,12 @@ dependencies:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
68
  version: '1.2'
69
- description:
69
+ description:
70
70
  email:
71
71
  - gjtorikian@gmail.com
72
72
  executables: []
73
73
  extensions:
74
- - ext/selma/Cargo.toml
74
+ - ext/selma/extconf.rb
75
75
  extra_rdoc_files: []
76
76
  files:
77
77
  - LICENSE.txt
@@ -103,7 +103,7 @@ files:
103
103
  - lib/selma/selector.rb
104
104
  - lib/selma/version.rb
105
105
  - selma.gemspec
106
- homepage:
106
+ homepage:
107
107
  licenses:
108
108
  - MIT
109
109
  metadata:
@@ -111,7 +111,7 @@ metadata:
111
111
  funding_uri: https://github.com/sponsors/gjtorikian/
112
112
  source_code_uri: https://github.com/gjtorikian/selma
113
113
  rubygems_mfa_required: 'true'
114
- post_install_message:
114
+ post_install_message:
115
115
  rdoc_options: []
116
116
  require_paths:
117
117
  - lib
@@ -126,8 +126,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
126
126
  - !ruby/object:Gem::Version
127
127
  version: 3.3.22
128
128
  requirements: []
129
- rubygems_version: 3.3.22
130
- signing_key:
129
+ rubygems_version: 3.4.3
130
+ signing_key:
131
131
  specification_version: 4
132
132
  summary: Selma selects and matches HTML nodes using CSS rules. Backed by Rust's lol_html
133
133
  parser.