selma 0.0.6 → 0.0.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 92fdde974708c332244cc1617d24a07b286588592efd1f17c78016d4d580cbba
4
- data.tar.gz: b69f34c2509f040b28e25cfbb94ba6d607f6603b2e53853e657b05b29bb0c06e
3
+ metadata.gz: 9d12ee9e6f3513dac3d511e1289c8719a2746dc0c565dd838c04e8cdab59bc6e
4
+ data.tar.gz: 75616f7a40ee931b88768cf27e92d97e38e3c8a31d897567368eeeb5ae0b6957
5
5
  SHA512:
6
- metadata.gz: '03714059152b3d9419893b0e692b889d3db577a8ba257bb7e8e9c0f91794d19afff6f98846332be3d1d188570d6226d4f514813e15bf337b67c659b97a60e582'
7
- data.tar.gz: 25cc4d1e666ca6ef616e2cd4ecf47d983dad8e5f61cdf28470f1d775d2bbeb033b6234d898e37376540d302fbd9373c25ca46d1a778252e2025c325eb53b203e
6
+ metadata.gz: 15c950e3689390a1e7a32f60191aa29a9f351ef8f7c84127552c0cd936bfb3ae858aa79050ab9c418234fa50e2fb2a3b11a95606bdacd041dfd69a6d66b3b949
7
+ data.tar.gz: e02be77c5b4a4e4f61641cc03b65103c2771cc3164caa456ecf6a40fbaa03e627a386265629f8fe77d4c277f0ed969d9220d589c740a3bd4a3e8f532168bf8c8
data/README.md CHANGED
@@ -37,7 +37,9 @@ sanitizer_config = {
37
37
  }
38
38
  sanitizer = Selma::Sanitizer.new(sanitizer_config)
39
39
  rewriter = Selma::Rewriter.new(sanitizer: sanitizer, handlers: [MatchElementRewrite.new, MatchTextRewrite.new])
40
- rewriter(html)
40
+ # removes any element that is not ["b", "em", "i", "strong", "u"];
41
+ # then calls `MatchElementRewrite` and `MatchTextRewrite` on matching HTML elements
42
+ rewriter.rewrite(html)
41
43
  ```
42
44
 
43
45
  Here's a look at each individual part.
data/ext/selma/extconf.rb CHANGED
@@ -3,4 +3,4 @@ require "rb_sys/mkmf"
3
3
 
4
4
  require_relative "_util"
5
5
 
6
- create_rust_makefile("selma")
6
+ create_rust_makefile("selma/selma")
@@ -4,8 +4,39 @@ module Selma
4
4
  class Sanitizer
5
5
  module Config
6
6
  BASIC = freeze_config(
7
- elements: ["a", "abbr", "blockquote", "b", "br", "cite", "code", "dd", "dfn", "dl", "dt", "em", "i", "kbd",
8
- "li", "mark", "ol", "p", "pre", "q", "s", "samp", "small", "strike", "strong", "sub", "sup", "time", "u", "ul", "var",],
7
+ elements: [
8
+ "a",
9
+ "abbr",
10
+ "blockquote",
11
+ "b",
12
+ "br",
13
+ "cite",
14
+ "code",
15
+ "dd",
16
+ "dfn",
17
+ "dl",
18
+ "dt",
19
+ "em",
20
+ "i",
21
+ "kbd",
22
+ "li",
23
+ "mark",
24
+ "ol",
25
+ "p",
26
+ "pre",
27
+ "q",
28
+ "s",
29
+ "samp",
30
+ "small",
31
+ "strike",
32
+ "strong",
33
+ "sub",
34
+ "sup",
35
+ "time",
36
+ "u",
37
+ "ul",
38
+ "var",
39
+ ],
9
40
 
10
41
  attributes: {
11
42
  "a" => ["href"],
@@ -33,13 +33,49 @@ module Selma
33
33
 
34
34
  # An Array of element names whose contents will be removed. The contents
35
35
  # of all other filtered elements will be left behind.
36
- remove_contents: ["iframe", "math", "noembed", "noframes", "noscript", "plaintext", "script", "style", "svg",
37
- "xmp",],
36
+ remove_contents: [
37
+ "iframe",
38
+ "math",
39
+ "noembed",
40
+ "noframes",
41
+ "noscript",
42
+ "plaintext",
43
+ "script",
44
+ "style",
45
+ "svg",
46
+ "xmp",
47
+ ],
38
48
 
39
49
  # Elements which, when removed, should have their contents surrounded by
40
50
  # whitespace.
41
- whitespace_elements: ["address", "article", "aside", "blockquote", "br", "dd", "div", "dl", "dt", "footer",
42
- "h1", "h2", "h3", "h4", "h5", "h6", "header", "hgroup", "hr", "li", "nav", "ol", "p", "pre", "section", "ul",],
51
+ whitespace_elements: [
52
+ "address",
53
+ "article",
54
+ "aside",
55
+ "blockquote",
56
+ "br",
57
+ "dd",
58
+ "div",
59
+ "dl",
60
+ "dt",
61
+ "footer",
62
+ "h1",
63
+ "h2",
64
+ "h3",
65
+ "h4",
66
+ "h5",
67
+ "h6",
68
+ "header",
69
+ "hgroup",
70
+ "hr",
71
+ "li",
72
+ "nav",
73
+ "ol",
74
+ "p",
75
+ "pre",
76
+ "section",
77
+ "ul",
78
+ ],
43
79
  )
44
80
  end
45
81
  end
@@ -4,12 +4,60 @@ module Selma
4
4
  class Sanitizer
5
5
  module Config
6
6
  RELAXED = freeze_config(
7
- elements: BASIC[:elements] + ["address", "article", "aside", "bdi", "bdo", "body", "caption", "col",
8
- "colgroup", "data", "del", "div", "figcaption", "figure", "footer", "h1", "h2", "h3", "h4", "h5", "h6", "head", "header", "hgroup", "hr", "html", "img", "ins", "main", "nav", "rp", "rt", "ruby", "section", "span", "style", "summary", "sup", "table", "tbody", "td", "tfoot", "th", "thead", "title", "tr", "wbr",],
7
+ elements: BASIC[:elements] + [
8
+ "address",
9
+ "article",
10
+ "aside",
11
+ "bdi",
12
+ "bdo",
13
+ "body",
14
+ "caption",
15
+ "col",
16
+ "colgroup",
17
+ "data",
18
+ "del",
19
+ "div",
20
+ "figcaption",
21
+ "figure",
22
+ "footer",
23
+ "h1",
24
+ "h2",
25
+ "h3",
26
+ "h4",
27
+ "h5",
28
+ "h6",
29
+ "head",
30
+ "header",
31
+ "hgroup",
32
+ "hr",
33
+ "html",
34
+ "img",
35
+ "ins",
36
+ "main",
37
+ "nav",
38
+ "rp",
39
+ "rt",
40
+ "ruby",
41
+ "section",
42
+ "span",
43
+ "style",
44
+ "summary",
45
+ "sup",
46
+ "table",
47
+ "tbody",
48
+ "td",
49
+ "tfoot",
50
+ "th",
51
+ "thead",
52
+ "title",
53
+ "tr",
54
+ "wbr",
55
+ ],
9
56
 
10
57
  allow_doctype: true,
11
58
 
12
- attributes: merge(BASIC[:attributes],
59
+ attributes: merge(
60
+ BASIC[:attributes],
13
61
  :all => ["class", "dir", "hidden", "id", "lang", "style", "tabindex", "title", "translate"],
14
62
  "a" => ["href", "hreflang", "name", "rel"],
15
63
  "col" => ["span", "width"],
@@ -21,16 +69,29 @@ module Selma
21
69
  "li" => ["value"],
22
70
  "ol" => ["reversed", "start", "type"],
23
71
  "style" => ["media", "scoped", "type"],
24
- "table" => ["align", "bgcolor", "border", "cellpadding", "cellspacing", "frame", "rules", "sortable",
25
- "summary", "width",],
72
+ "table" => [
73
+ "align",
74
+ "bgcolor",
75
+ "border",
76
+ "cellpadding",
77
+ "cellspacing",
78
+ "frame",
79
+ "rules",
80
+ "sortable",
81
+ "summary",
82
+ "width",
83
+ ],
26
84
  "td" => ["abbr", "align", "axis", "colspan", "headers", "rowspan", "valign", "width"],
27
85
  "th" => ["abbr", "align", "axis", "colspan", "headers", "rowspan", "scope", "sorted", "valign", "width"],
28
- "ul" => ["type"]),
86
+ "ul" => ["type"],
87
+ ),
29
88
 
30
- protocols: merge(BASIC[:protocols],
89
+ protocols: merge(
90
+ BASIC[:protocols],
31
91
  "del" => { "cite" => ["http", "https", :relative] },
32
92
  "img" => { "src" => ["http", "https", :relative] },
33
- "ins" => { "cite" => ["http", "https", :relative] }),
93
+ "ins" => { "cite" => ["http", "https", :relative] },
94
+ ),
34
95
  )
35
96
  end
36
97
  end
data/lib/selma/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Selma
4
- VERSION = "0.0.6"
4
+ VERSION = "0.0.7"
5
5
  end
data/selma.gemspec CHANGED
@@ -24,7 +24,7 @@ Gem::Specification.new do |spec|
24
24
  spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
25
25
 
26
26
  spec.require_paths = ["lib"]
27
- spec.extensions = ["ext/selma/Cargo.toml"]
27
+ spec.extensions = ["ext/selma/extconf.rb"]
28
28
 
29
29
  spec.metadata = {
30
30
  "allowed_push_host" => "https://rubygems.org",
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: selma
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
4
+ version: 0.0.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Garen J. Torikian
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-12-28 00:00:00.000000000 Z
11
+ date: 2023-01-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys
@@ -66,12 +66,12 @@ dependencies:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
68
  version: '1.2'
69
- description:
69
+ description:
70
70
  email:
71
71
  - gjtorikian@gmail.com
72
72
  executables: []
73
73
  extensions:
74
- - ext/selma/Cargo.toml
74
+ - ext/selma/extconf.rb
75
75
  extra_rdoc_files: []
76
76
  files:
77
77
  - LICENSE.txt
@@ -103,7 +103,7 @@ files:
103
103
  - lib/selma/selector.rb
104
104
  - lib/selma/version.rb
105
105
  - selma.gemspec
106
- homepage:
106
+ homepage:
107
107
  licenses:
108
108
  - MIT
109
109
  metadata:
@@ -111,7 +111,7 @@ metadata:
111
111
  funding_uri: https://github.com/sponsors/gjtorikian/
112
112
  source_code_uri: https://github.com/gjtorikian/selma
113
113
  rubygems_mfa_required: 'true'
114
- post_install_message:
114
+ post_install_message:
115
115
  rdoc_options: []
116
116
  require_paths:
117
117
  - lib
@@ -126,8 +126,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
126
126
  - !ruby/object:Gem::Version
127
127
  version: 3.3.22
128
128
  requirements: []
129
- rubygems_version: 3.3.22
130
- signing_key:
129
+ rubygems_version: 3.4.3
130
+ signing_key:
131
131
  specification_version: 4
132
132
  summary: Selma selects and matches HTML nodes using CSS rules. Backed by Rust's lol_html
133
133
  parser.