google-local-results-ai-parser 0.2.4 → 0.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/google-local-results-ai-parser.rb +39 -16
- metadata +65 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6935f20f427cc87b991af4c49d4db7d52004303837e075f20c73e933c5f77d90
|
4
|
+
data.tar.gz: 5be934605c20bcfea761f148ae3574315f56d3d2e9ea19307f891e8bc0ab7938
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d34c7e63db0ed905053b98c68888f2830fdbdc1e2d2117d7353df8df2a704b66beb72dd8940f44224ceaebf6d643d8df1dce67c56a5864ed56f99d2ba6c13f78
|
7
|
+
data.tar.gz: 10453d1c6f06571a0d81f3c6b8bbd96659764c230d44cec898b8f8832f317d22bfa661b04c0cf4cc091f16033f68f44beb56a20505419738e1653d9d57d59a90
|
@@ -24,13 +24,13 @@ module GoogleLocalResultsAiParser
|
|
24
24
|
end
|
25
25
|
|
26
26
|
class << self
|
27
|
-
def parse_multiple(html_parts: nil, bearer_token: nil, server: DEFAULT_SERVER, separator_regex: DEFAULT_SEPARATOR_REGEX, rejected_css: DEFAULT_REJECTED_CSS, broken_css: DEFAULT_BROKEN_CSS, iteration: DEFAULT_MAX_ITERATION)
|
27
|
+
def parse_multiple(html_parts: nil, bearer_token: nil, server: DEFAULT_SERVER, separator_regex: DEFAULT_SEPARATOR_REGEX, rejected_css: DEFAULT_REJECTED_CSS, broken_css: DEFAULT_BROKEN_CSS, iteration: DEFAULT_MAX_ITERATION, debug: false, no_cache: false)
|
28
28
|
response_bodies = Parallel.map(html_parts, in_threads: html_parts.size) do |html|
|
29
|
-
parse(html: html, bearer_token: bearer_token, server: server, separator_regex: separator_regex, rejected_css: rejected_css, broken_css:
|
29
|
+
parse(html: html, bearer_token: bearer_token, server: server, separator_regex: separator_regex, rejected_css: rejected_css, broken_css: broken_css, iteration: iteration, debug: debug, no_cache: no_cache)
|
30
30
|
end
|
31
31
|
end
|
32
32
|
|
33
|
-
def parse(html: nil, bearer_token: nil, server: DEFAULT_SERVER, separator_regex: DEFAULT_SEPARATOR_REGEX, rejected_css: DEFAULT_REJECTED_CSS, broken_css: DEFAULT_BROKEN_CSS, iteration: DEFAULT_MAX_ITERATION)
|
33
|
+
def parse(html: nil, bearer_token: nil, server: DEFAULT_SERVER, separator_regex: DEFAULT_SEPARATOR_REGEX, rejected_css: DEFAULT_REJECTED_CSS, broken_css: DEFAULT_BROKEN_CSS, iteration: DEFAULT_MAX_ITERATION, debug: false, no_cache: false)
|
34
34
|
doc = Nokolexbor::HTML(html)
|
35
35
|
|
36
36
|
# Rejecting title, buttons, and label
|
@@ -46,12 +46,20 @@ module GoogleLocalResultsAiParser
|
|
46
46
|
cleaned_text = split_text.map(&:strip).reject(&:empty?).flatten
|
47
47
|
|
48
48
|
# Making parallel requests to server for classification
|
49
|
-
|
49
|
+
time_start = Time.now
|
50
|
+
results = parallel_post_requests(server, bearer_token, cleaned_text, no_cache)
|
51
|
+
time_end = Time.now
|
50
52
|
|
51
53
|
# After-fix and sorting of results
|
52
54
|
results = sort_results(results, extracted_text, unsplit_text, iteration, doc)
|
53
55
|
final_results = transform_hash(results, unsplit_text)
|
54
|
-
|
56
|
+
|
57
|
+
unless debug
|
58
|
+
final_results # Default output
|
59
|
+
else
|
60
|
+
time_taken = time_end - time_start # Time taken to make requests for debugging purpurses
|
61
|
+
return final_results, time_taken
|
62
|
+
end
|
55
63
|
end
|
56
64
|
|
57
65
|
def transform_hash(results, unsplit_text)
|
@@ -74,6 +82,17 @@ module GoogleLocalResultsAiParser
|
|
74
82
|
end
|
75
83
|
|
76
84
|
def sort_results(results, extracted_text, unsplit_text, iteration, doc)
|
85
|
+
# Some endpoints load array of hashes whereas some of them
|
86
|
+
# load a wrapped version of this. The Free Inference API
|
87
|
+
# should be taken as reference since most people will
|
88
|
+
# prototype there.
|
89
|
+
results.map! do |item|
|
90
|
+
if item[:result][0].is_a?(Hash)
|
91
|
+
item[:result] = [item[:result]]
|
92
|
+
end
|
93
|
+
item
|
94
|
+
end
|
95
|
+
|
77
96
|
# Make at most 2 iterations for after-corrections
|
78
97
|
(0..iteration).each do |i|
|
79
98
|
begin
|
@@ -464,19 +483,17 @@ module GoogleLocalResultsAiParser
|
|
464
483
|
"Online estimates not available",
|
465
484
|
"Takeaway"
|
466
485
|
]
|
467
|
-
caught_results_indices = results.map.with_index {|result, index| index if known_errors.include?(result[:input])}.compact
|
486
|
+
caught_results_indices = results.map.with_index {|result, index| index if known_errors.include?(result[:input]) && result[:result][0][0]["label"] != "service options"}.compact
|
468
487
|
|
469
488
|
not_service_option_duplicates = []
|
470
489
|
caught_results_indices.each do |caught_index|
|
471
490
|
duplicates.each.with_index do |duplicate, duplicate_index|
|
472
|
-
if duplicate.include?(caught_index)
|
491
|
+
if duplicate.include?(caught_index)
|
473
492
|
not_service_option_duplicates << duplicate_index
|
474
493
|
end
|
475
494
|
end
|
476
495
|
end
|
477
496
|
|
478
|
-
already_a_service_option = caught_results_indices.all? {|index| results[index][:result][0][0]["label"] == "service options"}
|
479
|
-
return results, label_order, duplicates if already_a_service_option
|
480
497
|
# Zero out the `type` or `description`, and put it to last position
|
481
498
|
caught_results_indices.each do |caught_index|
|
482
499
|
service_options_hash = results[caught_index][:result][0].find {|hash| hash["label"] == "service options" }
|
@@ -487,10 +504,11 @@ module GoogleLocalResultsAiParser
|
|
487
504
|
old_result_hash["score"] = 0.0
|
488
505
|
results[caught_index][:result][0] << old_result_hash
|
489
506
|
end
|
490
|
-
|
507
|
+
|
491
508
|
# Rearranging `label_order`
|
492
|
-
caught_results_indices.each {|caught_index| label_order[caught_index] = "
|
509
|
+
caught_results_indices.each {|caught_index| label_order[caught_index] = "service options"}
|
493
510
|
|
511
|
+
return results, label_order, duplicates if not_service_option_duplicates == []
|
494
512
|
# Rearranging duplicates
|
495
513
|
not_service_option_duplicates.each do |not_service_option_duplicate|
|
496
514
|
last_item = duplicates[not_service_option_duplicate][-1]
|
@@ -540,18 +558,23 @@ module GoogleLocalResultsAiParser
|
|
540
558
|
|
541
559
|
private
|
542
560
|
|
543
|
-
def parallel_post_requests(server, bearer_token, inputs)
|
561
|
+
def parallel_post_requests(server, bearer_token, inputs, no_cache)
|
544
562
|
response_bodies = Parallel.map(inputs, in_threads: inputs.size) do |input|
|
545
|
-
post_request(server, bearer_token, input)
|
563
|
+
post_request(server, bearer_token, input, no_cache)
|
546
564
|
end
|
547
565
|
|
548
566
|
response_bodies
|
549
567
|
end
|
550
568
|
|
551
|
-
def post_request(server, bearer_token, input)
|
569
|
+
def post_request(server, bearer_token, input, no_cache)
|
552
570
|
url = URI.parse(server)
|
553
|
-
headers =
|
554
|
-
|
571
|
+
headers = unless no_cache
|
572
|
+
{ 'Authorization' => "Bearer #{bearer_token}", 'Content-Type' => 'application/json' }
|
573
|
+
else
|
574
|
+
{ 'Authorization' => "Bearer #{bearer_token}", 'Content-Type' => 'application/json', 'Cache-Control' => 'no-cache' } # To benchmark initial load of the model
|
575
|
+
end
|
576
|
+
|
577
|
+
body = { inputs: input, parameters: {top_k: 11}}.to_json # 11 represents the number of labels the model has
|
555
578
|
|
556
579
|
response = HTTP.headers(headers).post(url, body: body)
|
557
580
|
response_body = JSON.parse(response.body)
|
metadata
CHANGED
@@ -1,15 +1,77 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: google-local-results-ai-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Emirhan Akdeniz
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
12
|
-
dependencies:
|
11
|
+
date: 2023-07-14 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: nokolexbor
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: http
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: parallel
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '1.20'
|
48
|
+
- - ">="
|
49
|
+
- !ruby/object:Gem::Version
|
50
|
+
version: 1.20.1
|
51
|
+
type: :runtime
|
52
|
+
prerelease: false
|
53
|
+
version_requirements: !ruby/object:Gem::Requirement
|
54
|
+
requirements:
|
55
|
+
- - "~>"
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
version: '1.20'
|
58
|
+
- - ">="
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: 1.20.1
|
61
|
+
- !ruby/object:Gem::Dependency
|
62
|
+
name: json
|
63
|
+
requirement: !ruby/object:Gem::Requirement
|
64
|
+
requirements:
|
65
|
+
- - ">="
|
66
|
+
- !ruby/object:Gem::Version
|
67
|
+
version: '0'
|
68
|
+
type: :runtime
|
69
|
+
prerelease: false
|
70
|
+
version_requirements: !ruby/object:Gem::Requirement
|
71
|
+
requirements:
|
72
|
+
- - ">="
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: '0'
|
13
75
|
description: A gem to be used with serpapi/bert-base-local-results model to predict
|
14
76
|
different parts of Google Local Listings. This gem uses BERT model at https://huggingface.co/serpapi/bert-base-local-results
|
15
77
|
in the background. For serving private servers, head to https://github.com/serpapi/google-local-results-ai-server
|