selma 0.0.6-x86_64-linux → 0.1.0-x86_64-linux

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: be9face4692cbc6653b2085a056679aab5eca490517f3fa17e4f53ac5c9b4028
4
- data.tar.gz: 47fa7091498f304b8aba324637218b7ca0af09e8370084337f1e588be677ac3e
3
+ metadata.gz: 8c130b05fe7738da1df29eb9fc125f1e06a08d35d82580163de4d542710d0b83
4
+ data.tar.gz: 15c186ca42e6c9ffdc07427419effa373c3686cefd6d6a33a60888854f8ef3e5
5
5
  SHA512:
6
- metadata.gz: 9e57f2b3f8a82aa92cbc68c17a465226631af6bc1c89150a65e58a0b87d37698418799f812083de22590fdf11a18e1991d847f6273c7d9d03c66fd4139c10cc7
7
- data.tar.gz: 66f079b74f387266446c5293b17dd1a534de422c1b7a7149ff562a4a9a05fefd7857216d2e6a782d92eecb7595ec5e1ba7b2e94c2a56350caef63c2d768a3901
6
+ metadata.gz: d58e7be164d0f66c3a06e94c0243d7f66089818365f6f40e781f211c66aa36345920265ad21be764070d45c468ad0cd2235748da97250c5737e0b95e77bff2fb
7
+ data.tar.gz: cdca69f98b98fc71c6b564adc410993af74d197464bee25ecc21ff7bbe9274c67542354cb6093b29d4661a4f53d48dbdbe14146a49b71e5220735176894d1f54
data/README.md CHANGED
@@ -29,7 +29,7 @@ Selma can perform two different actions, either independently or together:
29
29
  - Sanitize HTML, through a [Sanitize](https://github.com/rgrove/sanitize)-like allowlist syntax; and
30
30
  - Select HTML using CSS rules, and manipulate elements and text nodes along the way.
31
31
 
32
- It does this through two kwargsL `sanitizer` and `handlers`. The basic API for Selma looks like this:
32
+ It does this through two kwargs: `sanitizer` and `handlers`. The basic API for Selma looks like this:
33
33
 
34
34
  ```ruby
35
35
  sanitizer_config = {
@@ -37,7 +37,9 @@ sanitizer_config = {
37
37
  }
38
38
  sanitizer = Selma::Sanitizer.new(sanitizer_config)
39
39
  rewriter = Selma::Rewriter.new(sanitizer: sanitizer, handlers: [MatchElementRewrite.new, MatchTextRewrite.new])
40
- rewriter(html)
40
+ # removes any element that is not ["b", "em", "i", "strong", "u"];
41
+ # then calls `MatchElementRewrite` and `MatchTextRewrite` on matching HTML elements
42
+ rewriter.rewrite(html)
41
43
  ```
42
44
 
43
45
  Here's a look at each individual part.
Binary file
Binary file
@@ -4,8 +4,39 @@ module Selma
4
4
  class Sanitizer
5
5
  module Config
6
6
  BASIC = freeze_config(
7
- elements: ["a", "abbr", "blockquote", "b", "br", "cite", "code", "dd", "dfn", "dl", "dt", "em", "i", "kbd",
8
- "li", "mark", "ol", "p", "pre", "q", "s", "samp", "small", "strike", "strong", "sub", "sup", "time", "u", "ul", "var",],
7
+ elements: [
8
+ "a",
9
+ "abbr",
10
+ "blockquote",
11
+ "b",
12
+ "br",
13
+ "cite",
14
+ "code",
15
+ "dd",
16
+ "dfn",
17
+ "dl",
18
+ "dt",
19
+ "em",
20
+ "i",
21
+ "kbd",
22
+ "li",
23
+ "mark",
24
+ "ol",
25
+ "p",
26
+ "pre",
27
+ "q",
28
+ "s",
29
+ "samp",
30
+ "small",
31
+ "strike",
32
+ "strong",
33
+ "sub",
34
+ "sup",
35
+ "time",
36
+ "u",
37
+ "ul",
38
+ "var",
39
+ ],
9
40
 
10
41
  attributes: {
11
42
  "a" => ["href"],
@@ -33,13 +33,49 @@ module Selma
33
33
 
34
34
  # An Array of element names whose contents will be removed. The contents
35
35
  # of all other filtered elements will be left behind.
36
- remove_contents: ["iframe", "math", "noembed", "noframes", "noscript", "plaintext", "script", "style", "svg",
37
- "xmp",],
36
+ remove_contents: [
37
+ "iframe",
38
+ "math",
39
+ "noembed",
40
+ "noframes",
41
+ "noscript",
42
+ "plaintext",
43
+ "script",
44
+ "style",
45
+ "svg",
46
+ "xmp",
47
+ ],
38
48
 
39
49
  # Elements which, when removed, should have their contents surrounded by
40
50
  # whitespace.
41
- whitespace_elements: ["address", "article", "aside", "blockquote", "br", "dd", "div", "dl", "dt", "footer",
42
- "h1", "h2", "h3", "h4", "h5", "h6", "header", "hgroup", "hr", "li", "nav", "ol", "p", "pre", "section", "ul",],
51
+ whitespace_elements: [
52
+ "address",
53
+ "article",
54
+ "aside",
55
+ "blockquote",
56
+ "br",
57
+ "dd",
58
+ "div",
59
+ "dl",
60
+ "dt",
61
+ "footer",
62
+ "h1",
63
+ "h2",
64
+ "h3",
65
+ "h4",
66
+ "h5",
67
+ "h6",
68
+ "header",
69
+ "hgroup",
70
+ "hr",
71
+ "li",
72
+ "nav",
73
+ "ol",
74
+ "p",
75
+ "pre",
76
+ "section",
77
+ "ul",
78
+ ],
43
79
  )
44
80
  end
45
81
  end
@@ -4,12 +4,60 @@ module Selma
4
4
  class Sanitizer
5
5
  module Config
6
6
  RELAXED = freeze_config(
7
- elements: BASIC[:elements] + ["address", "article", "aside", "bdi", "bdo", "body", "caption", "col",
8
- "colgroup", "data", "del", "div", "figcaption", "figure", "footer", "h1", "h2", "h3", "h4", "h5", "h6", "head", "header", "hgroup", "hr", "html", "img", "ins", "main", "nav", "rp", "rt", "ruby", "section", "span", "style", "summary", "sup", "table", "tbody", "td", "tfoot", "th", "thead", "title", "tr", "wbr",],
7
+ elements: BASIC[:elements] + [
8
+ "address",
9
+ "article",
10
+ "aside",
11
+ "bdi",
12
+ "bdo",
13
+ "body",
14
+ "caption",
15
+ "col",
16
+ "colgroup",
17
+ "data",
18
+ "del",
19
+ "div",
20
+ "figcaption",
21
+ "figure",
22
+ "footer",
23
+ "h1",
24
+ "h2",
25
+ "h3",
26
+ "h4",
27
+ "h5",
28
+ "h6",
29
+ "head",
30
+ "header",
31
+ "hgroup",
32
+ "hr",
33
+ "html",
34
+ "img",
35
+ "ins",
36
+ "main",
37
+ "nav",
38
+ "rp",
39
+ "rt",
40
+ "ruby",
41
+ "section",
42
+ "span",
43
+ "style",
44
+ "summary",
45
+ "sup",
46
+ "table",
47
+ "tbody",
48
+ "td",
49
+ "tfoot",
50
+ "th",
51
+ "thead",
52
+ "title",
53
+ "tr",
54
+ "wbr",
55
+ ],
9
56
 
10
57
  allow_doctype: true,
11
58
 
12
- attributes: merge(BASIC[:attributes],
59
+ attributes: merge(
60
+ BASIC[:attributes],
13
61
  :all => ["class", "dir", "hidden", "id", "lang", "style", "tabindex", "title", "translate"],
14
62
  "a" => ["href", "hreflang", "name", "rel"],
15
63
  "col" => ["span", "width"],
@@ -21,16 +69,29 @@ module Selma
21
69
  "li" => ["value"],
22
70
  "ol" => ["reversed", "start", "type"],
23
71
  "style" => ["media", "scoped", "type"],
24
- "table" => ["align", "bgcolor", "border", "cellpadding", "cellspacing", "frame", "rules", "sortable",
25
- "summary", "width",],
72
+ "table" => [
73
+ "align",
74
+ "bgcolor",
75
+ "border",
76
+ "cellpadding",
77
+ "cellspacing",
78
+ "frame",
79
+ "rules",
80
+ "sortable",
81
+ "summary",
82
+ "width",
83
+ ],
26
84
  "td" => ["abbr", "align", "axis", "colspan", "headers", "rowspan", "valign", "width"],
27
85
  "th" => ["abbr", "align", "axis", "colspan", "headers", "rowspan", "scope", "sorted", "valign", "width"],
28
- "ul" => ["type"]),
86
+ "ul" => ["type"],
87
+ ),
29
88
 
30
- protocols: merge(BASIC[:protocols],
89
+ protocols: merge(
90
+ BASIC[:protocols],
31
91
  "del" => { "cite" => ["http", "https", :relative] },
32
92
  "img" => { "src" => ["http", "https", :relative] },
33
- "ins" => { "cite" => ["http", "https", :relative] }),
93
+ "ins" => { "cite" => ["http", "https", :relative] },
94
+ ),
34
95
  )
35
96
  end
36
97
  end
data/lib/selma/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Selma
4
- VERSION = "0.0.6"
4
+ VERSION = "0.1.0"
5
5
  end
metadata CHANGED
@@ -1,29 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: selma
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
4
+ version: 0.1.0
5
5
  platform: x86_64-linux
6
6
  authors:
7
7
  - Garen J. Torikian
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-12-28 00:00:00.000000000 Z
11
+ date: 2023-03-29 00:00:00.000000000 Z
12
12
  dependencies:
13
- - !ruby/object:Gem::Dependency
14
- name: rb_sys
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - "~>"
18
- - !ruby/object:Gem::Version
19
- version: '0.9'
20
- type: :runtime
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - "~>"
25
- - !ruby/object:Gem::Version
26
- version: '0.9'
27
13
  - !ruby/object:Gem::Dependency
28
14
  name: rake
29
15
  requirement: !ruby/object:Gem::Requirement
@@ -52,21 +38,7 @@ dependencies:
52
38
  - - "~>"
53
39
  - !ruby/object:Gem::Version
54
40
  version: '1.2'
55
- - !ruby/object:Gem::Dependency
56
- name: rake-compiler-dock
57
- requirement: !ruby/object:Gem::Requirement
58
- requirements:
59
- - - "~>"
60
- - !ruby/object:Gem::Version
61
- version: '1.2'
62
- type: :development
63
- prerelease: false
64
- version_requirements: !ruby/object:Gem::Requirement
65
- requirements:
66
- - - "~>"
67
- - !ruby/object:Gem::Version
68
- version: '1.2'
69
- description:
41
+ description:
70
42
  email:
71
43
  - gjtorikian@gmail.com
72
44
  executables: []
@@ -75,22 +47,9 @@ extra_rdoc_files: []
75
47
  files:
76
48
  - LICENSE.txt
77
49
  - README.md
78
- - ext/selma/Cargo.toml
79
- - ext/selma/_util.rb
80
- - ext/selma/extconf.rb
81
- - ext/selma/src/html.rs
82
- - ext/selma/src/html/element.rs
83
- - ext/selma/src/html/end_tag.rs
84
- - ext/selma/src/html/text_chunk.rs
85
- - ext/selma/src/lib.rs
86
- - ext/selma/src/native_ref_wrap.rs
87
- - ext/selma/src/rewriter.rs
88
- - ext/selma/src/sanitizer.rs
89
- - ext/selma/src/selector.rs
90
- - ext/selma/src/tags.rs
91
- - ext/selma/src/wrapped_struct.rs
92
50
  - lib/selma.rb
93
51
  - lib/selma/3.1/selma.so
52
+ - lib/selma/3.2/selma.so
94
53
  - lib/selma/extension.rb
95
54
  - lib/selma/html.rb
96
55
  - lib/selma/rewriter.rb
@@ -102,8 +61,7 @@ files:
102
61
  - lib/selma/sanitizer/config/restricted.rb
103
62
  - lib/selma/selector.rb
104
63
  - lib/selma/version.rb
105
- - selma.gemspec
106
- homepage:
64
+ homepage:
107
65
  licenses:
108
66
  - MIT
109
67
  metadata:
@@ -111,7 +69,7 @@ metadata:
111
69
  funding_uri: https://github.com/sponsors/gjtorikian/
112
70
  source_code_uri: https://github.com/gjtorikian/selma
113
71
  rubygems_mfa_required: 'true'
114
- post_install_message:
72
+ post_install_message:
115
73
  rdoc_options: []
116
74
  require_paths:
117
75
  - lib
@@ -122,15 +80,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
122
80
  version: '3.1'
123
81
  - - "<"
124
82
  - !ruby/object:Gem::Version
125
- version: 3.2.dev
83
+ version: 3.3.dev
126
84
  required_rubygems_version: !ruby/object:Gem::Requirement
127
85
  requirements:
128
86
  - - ">="
129
87
  - !ruby/object:Gem::Version
130
88
  version: 3.3.22
131
89
  requirements: []
132
- rubygems_version: 3.3.22
133
- signing_key:
90
+ rubygems_version: 3.4.4
91
+ signing_key:
134
92
  specification_version: 4
135
93
  summary: Selma selects and matches HTML nodes using CSS rules. Backed by Rust's lol_html
136
94
  parser.
data/ext/selma/Cargo.toml DELETED
@@ -1,14 +0,0 @@
1
- [package]
2
- name = "selma"
3
- version = "1.0.0"
4
- edition = "2021"
5
-
6
- [dependencies]
7
- enum-iterator = "1.2"
8
- escapist = "0.0.2"
9
- magnus = { git = "https://github.com/matsadler/magnus", rev = "23160f7229ac74c42da1b5096a65ccbc40962697" }
10
- lol_html = "0.3"
11
-
12
- [lib]
13
- name = "selma"
14
- crate-type = ["cdylib"]
data/ext/selma/_util.rb DELETED
@@ -1,102 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- RUBY_MAJOR, RUBY_MINOR = RUBY_VERSION.split(".").collect(&:to_i)
4
-
5
- PACKAGE_ROOT_DIR = File.expand_path(File.join(File.dirname(__FILE__), "..", ".."))
6
- PACKAGE_EXT_DIR = File.join(PACKAGE_ROOT_DIR, "ext", "selma")
7
-
8
- OS = case os = RbConfig::CONFIG["host_os"].downcase
9
- when /linux/
10
- # The official ruby-alpine Docker containers pre-build Ruby. As a result,
11
- # Ruby doesn't know that it's on a musl-based platform. `ldd` is the
12
- # a more reliable way to detect musl.
13
- # See https://github.com/skylightio/skylight-ruby/issues/92
14
- if ENV["SKYLIGHT_MUSL"] || %x(ldd --version 2>&1).include?("musl")
15
- "linux-musl"
16
- else
17
- "linux"
18
- end
19
- when /darwin/
20
- "darwin"
21
- when /freebsd/
22
- "freebsd"
23
- when /netbsd/
24
- "netbsd"
25
- when /openbsd/
26
- "openbsd"
27
- when /sunos|solaris/
28
- "solaris"
29
- when /mingw|mswin/
30
- "windows"
31
- else
32
- os
33
- end
34
-
35
- # Normalize the platform CPU
36
- ARCH = case cpu = RbConfig::CONFIG["host_cpu"].downcase
37
- when /amd64|x86_64|x64/
38
- "x86_64"
39
- when /i?86|x86|i86pc/
40
- "x86"
41
- when /ppc|powerpc/
42
- "powerpc"
43
- when /^aarch/
44
- "aarch"
45
- when /^arm/
46
- "arm"
47
- else
48
- cpu
49
- end
50
-
51
- def windows?
52
- OS == "windows"
53
- end
54
-
55
- def solaris?
56
- OS == solaries
57
- end
58
-
59
- def darwin?
60
- OS == "darwin"
61
- end
62
-
63
- def macos?
64
- darwin? || OS == "macos"
65
- end
66
-
67
- def openbsd?
68
- OS == "openbsd"
69
- end
70
-
71
- def aix?
72
- OS == "aix"
73
- end
74
-
75
- def nix?
76
- !(windows? || solaris? || darwin?)
77
- end
78
-
79
- def x86_64?
80
- ARCH == "x86_64"
81
- end
82
-
83
- def x86?
84
- ARCH == "x86"
85
- end
86
-
87
- def abs_path(path)
88
- File.join(PACKAGE_EXT_DIR, path)
89
- end
90
-
91
- def find_header_or_abort(header, *paths)
92
- find_header(header, *paths) || abort("#{header} was expected in `#{paths.join(", ")}`, but it is missing.")
93
- end
94
-
95
- def find_library_or_abort(lib, func, *paths)
96
- find_library(lib, func, *paths) || abort("#{lib} was expected in `#{paths.join(", ")}`, but it is missing.")
97
- end
98
-
99
- def concat_flags(*args)
100
- args.compact.join(" ")
101
- end
102
-
data/ext/selma/extconf.rb DELETED
@@ -1,6 +0,0 @@
1
- require "mkmf"
2
- require "rb_sys/mkmf"
3
-
4
- require_relative "_util"
5
-
6
- create_rust_makefile("selma")