curation 2.0.1 → 2.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +1 -0
- data/Gemfile.lock +13 -2
- data/curation.gemspec +1 -0
- data/lib/curation/finders/text.rb +1 -1
- data/lib/curation/finders/title.rb +5 -5
- data/lib/curation/version.rb +1 -1
- data/lib/curation.rb +1 -0
- metadata +17 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7385bf2f2c34ab16df36a865f82bb53effca4f13824122d788f22afab6d5f956
|
4
|
+
data.tar.gz: 63d681cb1c06c5aa34caa56deae021ae28292306df00bf85708341ad9006568a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 222924c992b61f62d8347d76a2f167773a83c7b2ac613031a3dc40d17a0fbbe1f1ec8485d4caa127f0bc89133503e4489b989ef1307676f0ef125d555f4b0e6f
|
7
|
+
data.tar.gz: 4668058ec845b325b46dac9a22c7ce6c3047f9d536655f5297d01b63a3bc087fca5dcb6494fd6d796720359e41eb61b572e2117878d4c730d6090dabe171332a
|
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,10 +1,11 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
curation (2.0.
|
4
|
+
curation (2.0.3)
|
5
5
|
htmlentities
|
6
6
|
metainspector
|
7
7
|
nokogiri
|
8
|
+
rails-html-sanitizer
|
8
9
|
|
9
10
|
GEM
|
10
11
|
remote: https://rubygems.org/
|
@@ -15,6 +16,7 @@ GEM
|
|
15
16
|
base64 (0.2.0)
|
16
17
|
builder (3.2.4)
|
17
18
|
byebug (11.1.3)
|
19
|
+
crass (1.0.6)
|
18
20
|
domain_name (0.5.20190701)
|
19
21
|
unf (>= 0.0.5, < 1.0.0)
|
20
22
|
faraday (2.7.11)
|
@@ -40,6 +42,9 @@ GEM
|
|
40
42
|
htmlentities (4.3.4)
|
41
43
|
http-cookie (1.0.5)
|
42
44
|
domain_name (~> 0.5)
|
45
|
+
loofah (2.21.4)
|
46
|
+
crass (~> 1.0.2)
|
47
|
+
nokogiri (>= 1.12.0)
|
43
48
|
metainspector (5.15.0)
|
44
49
|
addressable (~> 2.8.4)
|
45
50
|
faraday (~> 2.5)
|
@@ -52,6 +57,7 @@ GEM
|
|
52
57
|
fastimage (~> 2.2)
|
53
58
|
nesty (~> 1.0)
|
54
59
|
nokogiri (~> 1.13)
|
60
|
+
mini_portile2 (2.8.9)
|
55
61
|
minitest (5.20.0)
|
56
62
|
minitest-reporters (1.6.1)
|
57
63
|
ansi
|
@@ -59,10 +65,14 @@ GEM
|
|
59
65
|
minitest (>= 5.0)
|
60
66
|
ruby-progressbar
|
61
67
|
nesty (1.0.2)
|
62
|
-
nokogiri (1.15.4
|
68
|
+
nokogiri (1.15.4)
|
69
|
+
mini_portile2 (~> 2.8.2)
|
63
70
|
racc (~> 1.4)
|
64
71
|
public_suffix (5.0.3)
|
65
72
|
racc (1.7.3)
|
73
|
+
rails-html-sanitizer (1.6.0)
|
74
|
+
loofah (~> 2.21)
|
75
|
+
nokogiri (~> 1.14)
|
66
76
|
rake (12.3.3)
|
67
77
|
ruby-progressbar (1.13.0)
|
68
78
|
ruby2_keywords (0.0.5)
|
@@ -72,6 +82,7 @@ GEM
|
|
72
82
|
zlib (2.1.1)
|
73
83
|
|
74
84
|
PLATFORMS
|
85
|
+
arm64-darwin-24
|
75
86
|
x86_64-darwin-21
|
76
87
|
x86_64-darwin-22
|
77
88
|
|
data/curation.gemspec
CHANGED
@@ -9,7 +9,7 @@ module Title
|
|
9
9
|
def find_title
|
10
10
|
find_title_with_json_ld ||
|
11
11
|
find_title_with_metainspector ||
|
12
|
-
find_title_with_nokogiri
|
12
|
+
find_title_with_nokogiri ||
|
13
13
|
''
|
14
14
|
end
|
15
15
|
|
@@ -29,11 +29,11 @@ module Title
|
|
29
29
|
metainspector_title = metainspector.title
|
30
30
|
# Problème avec une balise <meta property="title" content="Run 0" />,
|
31
31
|
# metainspector croit que c'est le titre de la page.
|
32
|
-
# Comme le title contient le best title, avec souvent des infos en plus sur le site,
|
32
|
+
# Comme le title contient le best title, avec souvent des infos en plus sur le site,
|
33
33
|
# on vérifie si le best title est bien contenu dans le title
|
34
|
-
if metainspector_title.present? &&
|
34
|
+
if metainspector_title.present? &&
|
35
35
|
metainspector_title.present? &&
|
36
|
-
metainspector_best_title.present? &&
|
36
|
+
metainspector_best_title.present? &&
|
37
37
|
metainspector_title.include?(metainspector_best_title)
|
38
38
|
return metainspector_best_title
|
39
39
|
elsif metainspector_title.present?
|
@@ -55,4 +55,4 @@ module Title
|
|
55
55
|
end
|
56
56
|
end
|
57
57
|
|
58
|
-
end
|
58
|
+
end
|
data/lib/curation/version.rb
CHANGED
data/lib/curation.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: curation
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Arnaud Levy
|
8
|
-
autorequire:
|
9
8
|
bindir: exe
|
10
9
|
cert_chain: []
|
11
|
-
date:
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
12
11
|
dependencies:
|
13
12
|
- !ruby/object:Gem::Dependency
|
14
13
|
name: metainspector
|
@@ -52,6 +51,20 @@ dependencies:
|
|
52
51
|
- - ">="
|
53
52
|
- !ruby/object:Gem::Version
|
54
53
|
version: '0'
|
54
|
+
- !ruby/object:Gem::Dependency
|
55
|
+
name: rails-html-sanitizer
|
56
|
+
requirement: !ruby/object:Gem::Requirement
|
57
|
+
requirements:
|
58
|
+
- - ">="
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: '0'
|
61
|
+
type: :runtime
|
62
|
+
prerelease: false
|
63
|
+
version_requirements: !ruby/object:Gem::Requirement
|
64
|
+
requirements:
|
65
|
+
- - ">="
|
66
|
+
- !ruby/object:Gem::Version
|
67
|
+
version: '0'
|
55
68
|
description: When you build content curation tools, you need to extract the content
|
56
69
|
of pages (title, text, image...). This requires different strategies and some fine
|
57
70
|
tuning to work efficiently.
|
@@ -87,7 +100,6 @@ licenses:
|
|
87
100
|
metadata:
|
88
101
|
homepage_uri: https://github.com/noesya/curation
|
89
102
|
source_code_uri: https://github.com/noesya/curation
|
90
|
-
post_install_message:
|
91
103
|
rdoc_options: []
|
92
104
|
require_paths:
|
93
105
|
- lib
|
@@ -102,8 +114,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
102
114
|
- !ruby/object:Gem::Version
|
103
115
|
version: '0'
|
104
116
|
requirements: []
|
105
|
-
rubygems_version: 3.
|
106
|
-
signing_key:
|
117
|
+
rubygems_version: 3.6.7
|
107
118
|
specification_version: 4
|
108
119
|
summary: Curation of content
|
109
120
|
test_files: []
|