curation 2.0.1 → 2.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c2062c7ec7fb444d27f102d26658b386a01a964ea4e03aa0d81a472316012d11
4
- data.tar.gz: 634f5216e61801b3ac42c8b340d5012c101024f4d5b5e0ba155c01a89f38cafb
3
+ metadata.gz: f4c4661f04dfa6c3442537bf91ded409452311936ec3e2dedce8fa48ac6ba5d1
4
+ data.tar.gz: d48f91295be205e59d3028907f1f6dfa9e0d83bc5322834b4b8b21ce824e62a2
5
5
  SHA512:
6
- metadata.gz: 15008b92c6a51fdf9bd79b9f1da01d59323c13589b6e8ccd5d8754b96153173fe8a2cdffa5faa45a4d38c92b097a35012f8dff98fea6244ade8c9af22c13d1cc
7
- data.tar.gz: 96a29d3c8482fce0101f91a3f24eafaec51a9115f2169831b67939edf79a47d7f5bb0c3223466de355cf8a367143a262cc54a1d5e4dd7e6bffe2d310879a9cae
6
+ metadata.gz: dbcc3db9b67f0b91773a1e72626d17c9e3ded02054f6b348bc5264005c09518d568e851f080e780316e32455941ee92146daa35c61c6fc3866806d698a949672
7
+ data.tar.gz: 0c03a44f6f97cf5f0180db1832989c9e0eadd0e7250d3f44a5c4cd6a10d9f42d437081a53449fe40a2c63b4e4b398eedc763fd29f7e3173bab3ee10fd7ca157a
data/Gemfile CHANGED
@@ -7,3 +7,4 @@ gem 'rake', '~> 12.0'
7
7
  gem 'minitest'
8
8
  gem 'minitest-reporters'
9
9
  gem 'byebug'
10
+
data/Gemfile.lock CHANGED
@@ -1,10 +1,11 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- curation (2.0.1)
4
+ curation (2.0.2)
5
5
  htmlentities
6
6
  metainspector
7
7
  nokogiri
8
+ rails-html-sanitizer
8
9
 
9
10
  GEM
10
11
  remote: https://rubygems.org/
@@ -15,6 +16,7 @@ GEM
15
16
  base64 (0.2.0)
16
17
  builder (3.2.4)
17
18
  byebug (11.1.3)
19
+ crass (1.0.6)
18
20
  domain_name (0.5.20190701)
19
21
  unf (>= 0.0.5, < 1.0.0)
20
22
  faraday (2.7.11)
@@ -40,6 +42,9 @@ GEM
40
42
  htmlentities (4.3.4)
41
43
  http-cookie (1.0.5)
42
44
  domain_name (~> 0.5)
45
+ loofah (2.21.4)
46
+ crass (~> 1.0.2)
47
+ nokogiri (>= 1.12.0)
43
48
  metainspector (5.15.0)
44
49
  addressable (~> 2.8.4)
45
50
  faraday (~> 2.5)
@@ -63,6 +68,9 @@ GEM
63
68
  racc (~> 1.4)
64
69
  public_suffix (5.0.3)
65
70
  racc (1.7.3)
71
+ rails-html-sanitizer (1.6.0)
72
+ loofah (~> 2.21)
73
+ nokogiri (~> 1.14)
66
74
  rake (12.3.3)
67
75
  ruby-progressbar (1.13.0)
68
76
  ruby2_keywords (0.0.5)
data/curation.gemspec CHANGED
@@ -24,4 +24,5 @@ Gem::Specification.new do |spec|
24
24
  spec.add_dependency "metainspector"
25
25
  spec.add_dependency "nokogiri"
26
26
  spec.add_dependency "htmlentities"
27
+ spec.add_dependency "rails-html-sanitizer"
27
28
  end
@@ -26,8 +26,8 @@ module Text
26
26
  text = text.gsub('<br><br>', '<br>')
27
27
  text = text.gsub(/\s+/, ' ')
28
28
  text = clean_encoding(text)
29
+ text = Rails::HTML5::FullSanitizer.new.sanitize(text)
29
30
  text
30
-
31
31
  end
32
32
 
33
33
  def find_text
@@ -1,3 +1,3 @@
1
1
  module Curation
2
- VERSION = "2.0.1"
2
+ VERSION = "2.0.2"
3
3
  end
data/lib/curation.rb CHANGED
@@ -10,6 +10,7 @@ require "curation/finders/title"
10
10
  require "metainspector"
11
11
  require "open-uri"
12
12
  require "htmlentities"
13
+ require "rails-html-sanitizer"
13
14
 
14
15
  module Curation
15
16
  class Error < StandardError; end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: curation
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.1
4
+ version: 2.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Arnaud Levy
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-11-11 00:00:00.000000000 Z
11
+ date: 2023-11-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: metainspector
@@ -52,6 +52,20 @@ dependencies:
52
52
  - - ">="
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rails-html-sanitizer
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
55
69
  description: When you build content curation tools, you need to extract the content
56
70
  of pages (title, text, image...). This requires different strategies and some fine
57
71
  tuning to work efficiently.