rumors-api-client 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 83f729f071e745c9af334ed82e1f36920dd4cf87a1e6372298aa240d25d178da
|
4
|
+
data.tar.gz: ffe2a61891125cfaece0f80c3bd84f0e151a129dfb48e1f8e7a73854c5f39f19
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8a934bdb885d196c884792db0778a1382e770b083f21394aebb93ac6c82add6651caf591ea975827cbd6081fe875de36effd8e2b2a20f2a2b93664f81f15f4ed
|
7
|
+
data.tar.gz: 4235fd57b928d7fb535d88d754b37a6e8b7d21aebece396ac4655e8ce79bc9119f99c70d9c8ae64b38079926f2ac6b42468f151e35f2be414c729eb90937d67e
|
@@ -6,7 +6,8 @@ module Rumors
|
|
6
6
|
SIMILARITY = 0.8
|
7
7
|
|
8
8
|
def initialize(text)
|
9
|
-
@text = text.
|
9
|
+
@text = text.strip
|
10
|
+
@urls = URI.extract(@text).map { |url| URI.parse(URI.escape(url)) }
|
10
11
|
end
|
11
12
|
|
12
13
|
def search
|
@@ -25,10 +26,18 @@ module Rumors
|
|
25
26
|
|
26
27
|
def return_article
|
27
28
|
contents = parse_content
|
28
|
-
|
29
|
-
|
29
|
+
return if contents.nil? || contents.empty?
|
30
|
+
article_id = nil
|
31
|
+
|
32
|
+
if @urls.any?
|
33
|
+
article_id = compare_urls(contents)
|
34
|
+
else
|
35
|
+
most_like = calculate_similarity(contents)
|
36
|
+
return unless most_like[:score] > SIMILARITY
|
37
|
+
article_id = most_like[:article_id]
|
38
|
+
end
|
30
39
|
|
31
|
-
find_article(
|
40
|
+
find_article(article_id) if article_id
|
32
41
|
end
|
33
42
|
|
34
43
|
def find_article(article_id)
|
@@ -36,13 +45,36 @@ module Rumors
|
|
36
45
|
end
|
37
46
|
|
38
47
|
def parse_content
|
48
|
+
# [{ 'article_id' => TfIdfSimilarity::Document(text), 'urls' => ["url"] }]
|
39
49
|
parsed_articles = JSON.parse(@articles.body)
|
40
50
|
parsed_articles['data']['ListArticles']['edges'].map do |article|
|
41
51
|
node = article['node']
|
42
|
-
Hash[node['id'], TfIdfSimilarity::Document.new(node['text'])]
|
52
|
+
content = Hash[node['id'], TfIdfSimilarity::Document.new(node['text'])]
|
53
|
+
content['urls'] = node['hyperlinks'].nil? ? nil : node["hyperlinks"].map { |link| URI.parse(URI.escape(link["url"])) }
|
54
|
+
content
|
43
55
|
end
|
44
56
|
end
|
45
57
|
|
58
|
+
def compare_urls(contents)
|
59
|
+
contents.each do |content|
|
60
|
+
return content.keys.first if exist_same_url?(content['urls'])
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def exist_same_url?(response_urls)
|
65
|
+
response_urls.each do |response_url|
|
66
|
+
@urls.each do |url|
|
67
|
+
next unless response_url.host == url.host
|
68
|
+
|
69
|
+
response_uris = response_url.path.split("/").reject { |path| path.empty? }
|
70
|
+
uris = url.path.split("/").reject { |path| path.empty? }
|
71
|
+
return true if (response_uris & uris) == response_uris
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
false
|
76
|
+
end
|
77
|
+
|
46
78
|
def calculate_similarity(contents)
|
47
79
|
# NOTE: https://github.com/jpmckinney/tf-idf-similarity
|
48
80
|
most_like = {
|
@@ -18,7 +18,7 @@ module Rumors
|
|
18
18
|
private
|
19
19
|
|
20
20
|
def gql_query
|
21
|
-
|
21
|
+
<<~GQL
|
22
22
|
query($text: String) {
|
23
23
|
ListArticles(
|
24
24
|
filter: { moreLikeThis: { like: $text } }
|
@@ -29,6 +29,9 @@ module Rumors
|
|
29
29
|
node {
|
30
30
|
id
|
31
31
|
text
|
32
|
+
hyperlinks {
|
33
|
+
url
|
34
|
+
}
|
32
35
|
articleReplies {
|
33
36
|
reply {
|
34
37
|
id
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rumors-api-client
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Carol H
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-01-
|
11
|
+
date: 2019-01-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: httparty
|