crawler-core 0.1.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/bin/console +0 -1
- data/crawler-core.gemspec +4 -2
- data/lib/crawler/api.rb +22 -0
- data/lib/crawler/base.rb +3 -27
- data/lib/crawler/configuration.rb +27 -0
- data/lib/crawler/core/version.rb +1 -1
- data/lib/crawler/utils.rb +19 -0
- metadata +53 -10
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 65170a04c894aafa51b513c1a99181f4dd247e435e56084a68a23796022a5d93
|
|
4
|
+
data.tar.gz: 153b6205377a788084f93e89bb7dc93088d10ce1c36d413cabbc29d78599a962
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 2e8b2bca63982400ec7648660a5a6ea3e04649cbbdbf6014f4ae50c54c4ad4f08faffc7fde6d4e3b6c7c345a48c778485d474d15248f4991ecdb631fd3df0592
|
|
7
|
+
data.tar.gz: 6ac9f91d23b315937175044e3c9e5d394ca5fd77e7954293a669049e2b7fc9e9271996d061098c28f74af5ba17ea4168f3fe83a6bc33a780d5f6c43e808df3e5
|
data/.gitignore
CHANGED
data/bin/console
CHANGED
data/crawler-core.gemspec
CHANGED
|
@@ -26,8 +26,10 @@ Gem::Specification.new do |spec|
|
|
|
26
26
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
|
27
27
|
spec.require_paths = ['lib']
|
|
28
28
|
|
|
29
|
-
spec.add_development_dependency 'bundler', '~> 1.
|
|
30
|
-
spec.add_development_dependency 'rake', '~>
|
|
29
|
+
spec.add_development_dependency 'bundler', '~> 2.1', '>= 2.1.4'
|
|
30
|
+
spec.add_development_dependency 'rake', '~> 13.0', '>= 13.0.1'
|
|
31
31
|
spec.add_runtime_dependency 'activesupport', '>= 3.0'
|
|
32
32
|
spec.add_runtime_dependency 'levenshtein-ffi', '>= 1.0'
|
|
33
|
+
spec.add_runtime_dependency 'faraday', '>= 1.0'
|
|
34
|
+
spec.add_runtime_dependency 'faraday_middleware', '>= 1.0'
|
|
33
35
|
end
|
data/lib/crawler/api.rb
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
require 'active_support/concern'
|
|
2
|
+
require 'active_support/inflector'
|
|
3
|
+
require 'faraday'
|
|
4
|
+
|
|
5
|
+
module Crawler
|
|
6
|
+
module Api
|
|
7
|
+
extend ActiveSupport::Concern
|
|
8
|
+
|
|
9
|
+
class_methods do
|
|
10
|
+
def connection(**opts)
|
|
11
|
+
url = self.const_defined?(:API_URL) ? self::API_URL : nil
|
|
12
|
+
user_agent = opts.key?(:user_agent) ? opts[:user_agent] : 'Mozilla/5.0 (Crawler/1.0; +https://cinema.paris)'
|
|
13
|
+
|
|
14
|
+
Faraday.new(url: url, headers: { user_agent: user_agent }) do |faraday|
|
|
15
|
+
faraday.request :url_encoded
|
|
16
|
+
faraday.response :json, content_type: /\bjson$/
|
|
17
|
+
faraday.adapter Faraday.default_adapter
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
data/lib/crawler/base.rb
CHANGED
|
@@ -1,38 +1,14 @@
|
|
|
1
1
|
require 'active_support/concern'
|
|
2
2
|
require 'active_support/inflector'
|
|
3
|
-
require '
|
|
3
|
+
require 'crawler/configuration'
|
|
4
4
|
|
|
5
5
|
module Crawler
|
|
6
6
|
module Base
|
|
7
7
|
extend ActiveSupport::Concern
|
|
8
|
+
include Configuration
|
|
8
9
|
|
|
9
10
|
class_methods do
|
|
10
|
-
def add_provider(
|
|
11
|
-
raise NotImplementedError
|
|
12
|
-
end
|
|
13
|
-
|
|
14
|
-
def configure
|
|
15
|
-
yield self
|
|
16
|
-
end
|
|
17
|
-
|
|
18
|
-
def transliterate(string)
|
|
19
|
-
ActiveSupport::Inflector.transliterate(string.gsub(/[:\-.,!?]/, ' ').strip.gsub(/\s+/, ' '), nil).downcase
|
|
20
|
-
end
|
|
21
|
-
|
|
22
|
-
def levenshtein_score(string_1, string_2)
|
|
23
|
-
string_1_transliterated = transliterate(string_1)
|
|
24
|
-
string_2_transliterated = transliterate(string_2)
|
|
25
|
-
levenshtein_distance = Levenshtein.distance(string_1_transliterated, string_2_transliterated)
|
|
26
|
-
max_size = [string_1_transliterated.size, string_2_transliterated.size].max.to_f
|
|
27
|
-
|
|
28
|
-
(max_size - levenshtein_distance) / max_size
|
|
29
|
-
end
|
|
30
|
-
|
|
31
|
-
def search(*args)
|
|
32
|
-
raise NotImplementedError
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
def best(*args)
|
|
11
|
+
def add_provider(_provider_name, _options = {})
|
|
36
12
|
raise NotImplementedError
|
|
37
13
|
end
|
|
38
14
|
end
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
require 'active_support/concern'
|
|
2
|
+
require 'active_support/inflector'
|
|
3
|
+
require 'singleton'
|
|
4
|
+
|
|
5
|
+
module Crawler
|
|
6
|
+
module Configuration
|
|
7
|
+
extend ActiveSupport::Concern
|
|
8
|
+
|
|
9
|
+
included do
|
|
10
|
+
module_eval <<-METHODS, __FILE__, __LINE__ + 1
|
|
11
|
+
class Configuration
|
|
12
|
+
include Singleton
|
|
13
|
+
end
|
|
14
|
+
METHODS
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
class_methods do
|
|
18
|
+
def config
|
|
19
|
+
self::Configuration.instance
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def configure
|
|
23
|
+
yield config
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
data/lib/crawler/core/version.rb
CHANGED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
require 'active_support/inflector'
|
|
2
|
+
require 'levenshtein-ffi'
|
|
3
|
+
|
|
4
|
+
module Crawler
|
|
5
|
+
module Utils
|
|
6
|
+
def self.transliterate(string)
|
|
7
|
+
ActiveSupport::Inflector.transliterate(string.gsub(/[:\-.,!?]/, ' ').strip.gsub(/\s+/, ' '), nil).downcase
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
def self.levenshtein_score(string_1, string_2)
|
|
11
|
+
string_1_transliterated = transliterate(string_1)
|
|
12
|
+
string_2_transliterated = transliterate(string_2)
|
|
13
|
+
levenshtein_distance = Levenshtein.distance(string_1_transliterated, string_2_transliterated)
|
|
14
|
+
max_size = [string_1_transliterated.size, string_2_transliterated.size].max.to_f
|
|
15
|
+
|
|
16
|
+
(max_size - levenshtein_distance) / max_size
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: crawler-core
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version:
|
|
4
|
+
version: 1.1.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Jonathan PHILIPPE
|
|
8
|
-
autorequire:
|
|
8
|
+
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2022-05-15 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: bundler
|
|
@@ -16,28 +16,40 @@ dependencies:
|
|
|
16
16
|
requirements:
|
|
17
17
|
- - "~>"
|
|
18
18
|
- !ruby/object:Gem::Version
|
|
19
|
-
version: '1
|
|
19
|
+
version: '2.1'
|
|
20
|
+
- - ">="
|
|
21
|
+
- !ruby/object:Gem::Version
|
|
22
|
+
version: 2.1.4
|
|
20
23
|
type: :development
|
|
21
24
|
prerelease: false
|
|
22
25
|
version_requirements: !ruby/object:Gem::Requirement
|
|
23
26
|
requirements:
|
|
24
27
|
- - "~>"
|
|
25
28
|
- !ruby/object:Gem::Version
|
|
26
|
-
version: '1
|
|
29
|
+
version: '2.1'
|
|
30
|
+
- - ">="
|
|
31
|
+
- !ruby/object:Gem::Version
|
|
32
|
+
version: 2.1.4
|
|
27
33
|
- !ruby/object:Gem::Dependency
|
|
28
34
|
name: rake
|
|
29
35
|
requirement: !ruby/object:Gem::Requirement
|
|
30
36
|
requirements:
|
|
31
37
|
- - "~>"
|
|
32
38
|
- !ruby/object:Gem::Version
|
|
33
|
-
version: '
|
|
39
|
+
version: '13.0'
|
|
40
|
+
- - ">="
|
|
41
|
+
- !ruby/object:Gem::Version
|
|
42
|
+
version: 13.0.1
|
|
34
43
|
type: :development
|
|
35
44
|
prerelease: false
|
|
36
45
|
version_requirements: !ruby/object:Gem::Requirement
|
|
37
46
|
requirements:
|
|
38
47
|
- - "~>"
|
|
39
48
|
- !ruby/object:Gem::Version
|
|
40
|
-
version: '
|
|
49
|
+
version: '13.0'
|
|
50
|
+
- - ">="
|
|
51
|
+
- !ruby/object:Gem::Version
|
|
52
|
+
version: 13.0.1
|
|
41
53
|
- !ruby/object:Gem::Dependency
|
|
42
54
|
name: activesupport
|
|
43
55
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -66,6 +78,34 @@ dependencies:
|
|
|
66
78
|
- - ">="
|
|
67
79
|
- !ruby/object:Gem::Version
|
|
68
80
|
version: '1.0'
|
|
81
|
+
- !ruby/object:Gem::Dependency
|
|
82
|
+
name: faraday
|
|
83
|
+
requirement: !ruby/object:Gem::Requirement
|
|
84
|
+
requirements:
|
|
85
|
+
- - ">="
|
|
86
|
+
- !ruby/object:Gem::Version
|
|
87
|
+
version: '1.0'
|
|
88
|
+
type: :runtime
|
|
89
|
+
prerelease: false
|
|
90
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
91
|
+
requirements:
|
|
92
|
+
- - ">="
|
|
93
|
+
- !ruby/object:Gem::Version
|
|
94
|
+
version: '1.0'
|
|
95
|
+
- !ruby/object:Gem::Dependency
|
|
96
|
+
name: faraday_middleware
|
|
97
|
+
requirement: !ruby/object:Gem::Requirement
|
|
98
|
+
requirements:
|
|
99
|
+
- - ">="
|
|
100
|
+
- !ruby/object:Gem::Version
|
|
101
|
+
version: '1.0'
|
|
102
|
+
type: :runtime
|
|
103
|
+
prerelease: false
|
|
104
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
105
|
+
requirements:
|
|
106
|
+
- - ">="
|
|
107
|
+
- !ruby/object:Gem::Version
|
|
108
|
+
version: '1.0'
|
|
69
109
|
description: ''
|
|
70
110
|
email:
|
|
71
111
|
- jonathan@cinema.paris
|
|
@@ -82,8 +122,11 @@ files:
|
|
|
82
122
|
- bin/console
|
|
83
123
|
- bin/setup
|
|
84
124
|
- crawler-core.gemspec
|
|
125
|
+
- lib/crawler/api.rb
|
|
85
126
|
- lib/crawler/base.rb
|
|
127
|
+
- lib/crawler/configuration.rb
|
|
86
128
|
- lib/crawler/core/version.rb
|
|
129
|
+
- lib/crawler/utils.rb
|
|
87
130
|
homepage: https://crawler.cinema.paris
|
|
88
131
|
licenses:
|
|
89
132
|
- CC-BY-SA-4.0
|
|
@@ -91,7 +134,7 @@ metadata:
|
|
|
91
134
|
homepage_uri: https://crawler.cinema.paris
|
|
92
135
|
source_code_uri: https://github.com/cinema-paris/crawler-core
|
|
93
136
|
changelog_uri: https://github.com/cinema-paris/crawler-core/CHANGELOG.md
|
|
94
|
-
post_install_message:
|
|
137
|
+
post_install_message:
|
|
95
138
|
rdoc_options: []
|
|
96
139
|
require_paths:
|
|
97
140
|
- lib
|
|
@@ -106,8 +149,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
106
149
|
- !ruby/object:Gem::Version
|
|
107
150
|
version: '0'
|
|
108
151
|
requirements: []
|
|
109
|
-
rubygems_version: 3.
|
|
110
|
-
signing_key:
|
|
152
|
+
rubygems_version: 3.3.3
|
|
153
|
+
signing_key:
|
|
111
154
|
specification_version: 4
|
|
112
155
|
summary: ''
|
|
113
156
|
test_files: []
|