google-local-results-ai-parser 0.2.4 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/google-local-results-ai-parser.rb +35 -12
- metadata +65 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 23d5956da19fd752805805c70fbd41c9b0c4b19a8b080d81a399e9efbced322c
|
4
|
+
data.tar.gz: 2ae85f484532bf47836efe3b354cb342d2672526adec07d86e8e294a2206f856
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 77d0a15beddddc3e9324cc56c71f16393d1222cc3bab8dcb6df5becf0a0ed3d8a35bb08e4a25da0f463411ee154af1807be279046d551381a595a583f5a661d9
|
7
|
+
data.tar.gz: c2a9844133da6f5a60cfb2f499e23b265043c41ec9a0d9216cba21ce5f4f5bb1e6766e40e1f1bd6eec41d9143174d47b0e468d1c0d280470c03d2cc7a0bbfb88
|
@@ -24,13 +24,13 @@ module GoogleLocalResultsAiParser
|
|
24
24
|
end
|
25
25
|
|
26
26
|
class << self
|
27
|
-
def parse_multiple(html_parts: nil, bearer_token: nil, server: DEFAULT_SERVER, separator_regex: DEFAULT_SEPARATOR_REGEX, rejected_css: DEFAULT_REJECTED_CSS, broken_css: DEFAULT_BROKEN_CSS, iteration: DEFAULT_MAX_ITERATION)
|
27
|
+
def parse_multiple(html_parts: nil, bearer_token: nil, server: DEFAULT_SERVER, separator_regex: DEFAULT_SEPARATOR_REGEX, rejected_css: DEFAULT_REJECTED_CSS, broken_css: DEFAULT_BROKEN_CSS, iteration: DEFAULT_MAX_ITERATION, debug: false, no_cache: false)
|
28
28
|
response_bodies = Parallel.map(html_parts, in_threads: html_parts.size) do |html|
|
29
|
-
parse(html: html, bearer_token: bearer_token, server: server, separator_regex: separator_regex, rejected_css: rejected_css, broken_css:
|
29
|
+
parse(html: html, bearer_token: bearer_token, server: server, separator_regex: separator_regex, rejected_css: rejected_css, broken_css: broken_css, iteration: iteration, debug: debug, no_cache: no_cache)
|
30
30
|
end
|
31
31
|
end
|
32
32
|
|
33
|
-
def parse(html: nil, bearer_token: nil, server: DEFAULT_SERVER, separator_regex: DEFAULT_SEPARATOR_REGEX, rejected_css: DEFAULT_REJECTED_CSS, broken_css: DEFAULT_BROKEN_CSS, iteration: DEFAULT_MAX_ITERATION)
|
33
|
+
def parse(html: nil, bearer_token: nil, server: DEFAULT_SERVER, separator_regex: DEFAULT_SEPARATOR_REGEX, rejected_css: DEFAULT_REJECTED_CSS, broken_css: DEFAULT_BROKEN_CSS, iteration: DEFAULT_MAX_ITERATION, debug: false, no_cache: false)
|
34
34
|
doc = Nokolexbor::HTML(html)
|
35
35
|
|
36
36
|
# Rejecting title, buttons, and label
|
@@ -46,12 +46,20 @@ module GoogleLocalResultsAiParser
|
|
46
46
|
cleaned_text = split_text.map(&:strip).reject(&:empty?).flatten
|
47
47
|
|
48
48
|
# Making parallel requests to server for classification
|
49
|
-
|
49
|
+
time_start = Time.now
|
50
|
+
results = parallel_post_requests(server, bearer_token, cleaned_text, no_cache)
|
51
|
+
time_end = Time.now
|
50
52
|
|
51
53
|
# After-fix and sorting of results
|
52
54
|
results = sort_results(results, extracted_text, unsplit_text, iteration, doc)
|
53
55
|
final_results = transform_hash(results, unsplit_text)
|
54
|
-
|
56
|
+
|
57
|
+
unless debug
|
58
|
+
final_results # Default output
|
59
|
+
else
|
60
|
+
time_taken = time_end - time_start # Time taken to make requests for debugging purpurses
|
61
|
+
return final_results, time_taken
|
62
|
+
end
|
55
63
|
end
|
56
64
|
|
57
65
|
def transform_hash(results, unsplit_text)
|
@@ -74,6 +82,17 @@ module GoogleLocalResultsAiParser
|
|
74
82
|
end
|
75
83
|
|
76
84
|
def sort_results(results, extracted_text, unsplit_text, iteration, doc)
|
85
|
+
# Some endpoints load array of hashes whereas some of them
|
86
|
+
# load a wrapped version of this. The Free Inference API
|
87
|
+
# should be taken as reference since most people will
|
88
|
+
# prototype there.
|
89
|
+
results.map! do |item|
|
90
|
+
if item[:result][0].is_a?(Hash)
|
91
|
+
item[:result] = [item[:result]]
|
92
|
+
end
|
93
|
+
item
|
94
|
+
end
|
95
|
+
|
77
96
|
# Make at most 2 iterations for after-corrections
|
78
97
|
(0..iteration).each do |i|
|
79
98
|
begin
|
@@ -475,8 +494,7 @@ module GoogleLocalResultsAiParser
|
|
475
494
|
end
|
476
495
|
end
|
477
496
|
|
478
|
-
|
479
|
-
return results, label_order, duplicates if already_a_service_option
|
497
|
+
return results, label_order, duplicates if not_service_option_duplicates == []
|
480
498
|
# Zero out the `type` or `description`, and put it to last position
|
481
499
|
caught_results_indices.each do |caught_index|
|
482
500
|
service_options_hash = results[caught_index][:result][0].find {|hash| hash["label"] == "service options" }
|
@@ -540,18 +558,23 @@ module GoogleLocalResultsAiParser
|
|
540
558
|
|
541
559
|
private
|
542
560
|
|
543
|
-
def parallel_post_requests(server, bearer_token, inputs)
|
561
|
+
def parallel_post_requests(server, bearer_token, inputs, no_cache)
|
544
562
|
response_bodies = Parallel.map(inputs, in_threads: inputs.size) do |input|
|
545
|
-
post_request(server, bearer_token, input)
|
563
|
+
post_request(server, bearer_token, input, no_cache)
|
546
564
|
end
|
547
565
|
|
548
566
|
response_bodies
|
549
567
|
end
|
550
568
|
|
551
|
-
def post_request(server, bearer_token, input)
|
569
|
+
def post_request(server, bearer_token, input, no_cache)
|
552
570
|
url = URI.parse(server)
|
553
|
-
headers =
|
554
|
-
|
571
|
+
headers = unless no_cache
|
572
|
+
{ 'Authorization' => "Bearer #{bearer_token}", 'Content-Type' => 'application/json' }
|
573
|
+
else
|
574
|
+
{ 'Authorization' => "Bearer #{bearer_token}", 'Content-Type' => 'application/json', 'Cache-Control' => 'no-cache' } # To benchmark initial load of the model
|
575
|
+
end
|
576
|
+
|
577
|
+
body = { inputs: input, parameters: {top_k: 11}}.to_json # 11 represents the number of labels the model has
|
555
578
|
|
556
579
|
response = HTTP.headers(headers).post(url, body: body)
|
557
580
|
response_body = JSON.parse(response.body)
|
metadata
CHANGED
@@ -1,15 +1,77 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: google-local-results-ai-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Emirhan Akdeniz
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
12
|
-
dependencies:
|
11
|
+
date: 2023-07-14 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: nokolexbor
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: http
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: parallel
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '1.20'
|
48
|
+
- - ">="
|
49
|
+
- !ruby/object:Gem::Version
|
50
|
+
version: 1.20.1
|
51
|
+
type: :runtime
|
52
|
+
prerelease: false
|
53
|
+
version_requirements: !ruby/object:Gem::Requirement
|
54
|
+
requirements:
|
55
|
+
- - "~>"
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
version: '1.20'
|
58
|
+
- - ">="
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: 1.20.1
|
61
|
+
- !ruby/object:Gem::Dependency
|
62
|
+
name: json
|
63
|
+
requirement: !ruby/object:Gem::Requirement
|
64
|
+
requirements:
|
65
|
+
- - ">="
|
66
|
+
- !ruby/object:Gem::Version
|
67
|
+
version: '0'
|
68
|
+
type: :runtime
|
69
|
+
prerelease: false
|
70
|
+
version_requirements: !ruby/object:Gem::Requirement
|
71
|
+
requirements:
|
72
|
+
- - ">="
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: '0'
|
13
75
|
description: A gem to be used with serpapi/bert-base-local-results model to predict
|
14
76
|
different parts of Google Local Listings. This gem uses BERT model at https://huggingface.co/serpapi/bert-base-local-results
|
15
77
|
in the background. For serving private servers, head to https://github.com/serpapi/google-local-results-ai-server
|