crawler-core 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/crawler/base.rb +0 -13
- data/lib/crawler/core/version.rb +1 -1
- data/lib/crawler/utils.rb +19 -0
- metadata +2 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 841bfe48c75a9057ed3bd94b3a72ccbbbc4682a311f67e21a392e8603438bd50
|
4
|
+
data.tar.gz: 974c6281ab809e755a3efc186ba77661cd142ad6bd881fc7c60fa94ef05b534c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cd2aac5b72b77a35b111b550189595c90ea4b4d799b22835c6a19b347376547aa5908101296fc156d3a31b338233d27fcacb40ae0beffc4dbac0247fcd2b9fd8
|
7
|
+
data.tar.gz: b5fc635ed34010d67c98a2fab48131960e690deb470853cf4bbcde904948bd3832a8da1bcd7dcba070ea80529c90a44f140711325141d0c206426fb9b44fa171
|
data/lib/crawler/base.rb
CHANGED
@@ -15,19 +15,6 @@ module Crawler
|
|
15
15
|
yield self
|
16
16
|
end
|
17
17
|
|
18
|
-
def transliterate(string)
|
19
|
-
ActiveSupport::Inflector.transliterate(string.gsub(/[:\-.,!?]/, ' ').strip.gsub(/\s+/, ' '), nil).downcase
|
20
|
-
end
|
21
|
-
|
22
|
-
def levenshtein_score(string_1, string_2)
|
23
|
-
string_1_transliterated = transliterate(string_1)
|
24
|
-
string_2_transliterated = transliterate(string_2)
|
25
|
-
levenshtein_distance = Levenshtein.distance(string_1_transliterated, string_2_transliterated)
|
26
|
-
max_size = [string_1_transliterated.size, string_2_transliterated.size].max.to_f
|
27
|
-
|
28
|
-
(max_size - levenshtein_distance) / max_size
|
29
|
-
end
|
30
|
-
|
31
18
|
def search(*args)
|
32
19
|
raise NotImplementedError
|
33
20
|
end
|
data/lib/crawler/core/version.rb
CHANGED
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'active_support/inflector'
|
2
|
+
require 'levenshtein-ffi'
|
3
|
+
|
4
|
+
module Crawler
|
5
|
+
module Utils
|
6
|
+
def self.transliterate(string)
|
7
|
+
ActiveSupport::Inflector.transliterate(string.gsub(/[:\-.,!?]/, ' ').strip.gsub(/\s+/, ' '), nil).downcase
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.levenshtein_score(string_1, string_2)
|
11
|
+
string_1_transliterated = transliterate(string_1)
|
12
|
+
string_2_transliterated = transliterate(string_2)
|
13
|
+
levenshtein_distance = Levenshtein.distance(string_1_transliterated, string_2_transliterated)
|
14
|
+
max_size = [string_1_transliterated.size, string_2_transliterated.size].max.to_f
|
15
|
+
|
16
|
+
(max_size - levenshtein_distance) / max_size
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: crawler-core
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jonathan PHILIPPE
|
@@ -84,6 +84,7 @@ files:
|
|
84
84
|
- crawler-core.gemspec
|
85
85
|
- lib/crawler/base.rb
|
86
86
|
- lib/crawler/core/version.rb
|
87
|
+
- lib/crawler/utils.rb
|
87
88
|
homepage: https://crawler.cinema.paris
|
88
89
|
licenses:
|
89
90
|
- CC-BY-SA-4.0
|