google-local-results-ai-parser 0.2.4 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9f8076de3793dabc268bc5705443e28202655e26c5a1503498767e1b8a62ff16
4
- data.tar.gz: 7e3cf96add3d90d2e6174f9acda7f6cd8357f2f06792d46fc308aa80cc7fa93f
3
+ metadata.gz: 23d5956da19fd752805805c70fbd41c9b0c4b19a8b080d81a399e9efbced322c
4
+ data.tar.gz: 2ae85f484532bf47836efe3b354cb342d2672526adec07d86e8e294a2206f856
5
5
  SHA512:
6
- metadata.gz: 91c997a71f5765a0d43cc1c4418cecc1a83b00eb3b418f4aed6802a1c5fa78391b337384433147cf5c7eb8af5455ad07289f8440c31e58585493978d4673394a
7
- data.tar.gz: b0d1a462cba2a8097c0bc54ec0142e78b5d7daffe5c5ed4c663ddb7522c35391ee4ba9587f803a711f2e0fbe7395ab7d353254eda6ecab6212bbeed64c563b4d
6
+ metadata.gz: 77d0a15beddddc3e9324cc56c71f16393d1222cc3bab8dcb6df5becf0a0ed3d8a35bb08e4a25da0f463411ee154af1807be279046d551381a595a583f5a661d9
7
+ data.tar.gz: c2a9844133da6f5a60cfb2f499e23b265043c41ec9a0d9216cba21ce5f4f5bb1e6766e40e1f1bd6eec41d9143174d47b0e468d1c0d280470c03d2cc7a0bbfb88
@@ -24,13 +24,13 @@ module GoogleLocalResultsAiParser
24
24
  end
25
25
 
26
26
  class << self
27
- def parse_multiple(html_parts: nil, bearer_token: nil, server: DEFAULT_SERVER, separator_regex: DEFAULT_SEPARATOR_REGEX, rejected_css: DEFAULT_REJECTED_CSS, broken_css: DEFAULT_BROKEN_CSS, iteration: DEFAULT_MAX_ITERATION)
27
+ def parse_multiple(html_parts: nil, bearer_token: nil, server: DEFAULT_SERVER, separator_regex: DEFAULT_SEPARATOR_REGEX, rejected_css: DEFAULT_REJECTED_CSS, broken_css: DEFAULT_BROKEN_CSS, iteration: DEFAULT_MAX_ITERATION, debug: false, no_cache: false)
28
28
  response_bodies = Parallel.map(html_parts, in_threads: html_parts.size) do |html|
29
- parse(html: html, bearer_token: bearer_token, server: server, separator_regex: separator_regex, rejected_css: rejected_css, broken_css: DEFAULT_BROKEN_CSS, iteration: DEFAULT_MAX_ITERATION)
29
+ parse(html: html, bearer_token: bearer_token, server: server, separator_regex: separator_regex, rejected_css: rejected_css, broken_css: broken_css, iteration: iteration, debug: debug, no_cache: no_cache)
30
30
  end
31
31
  end
32
32
 
33
- def parse(html: nil, bearer_token: nil, server: DEFAULT_SERVER, separator_regex: DEFAULT_SEPARATOR_REGEX, rejected_css: DEFAULT_REJECTED_CSS, broken_css: DEFAULT_BROKEN_CSS, iteration: DEFAULT_MAX_ITERATION)
33
+ def parse(html: nil, bearer_token: nil, server: DEFAULT_SERVER, separator_regex: DEFAULT_SEPARATOR_REGEX, rejected_css: DEFAULT_REJECTED_CSS, broken_css: DEFAULT_BROKEN_CSS, iteration: DEFAULT_MAX_ITERATION, debug: false, no_cache: false)
34
34
  doc = Nokolexbor::HTML(html)
35
35
 
36
36
  # Rejecting title, buttons, and label
@@ -46,12 +46,20 @@ module GoogleLocalResultsAiParser
46
46
  cleaned_text = split_text.map(&:strip).reject(&:empty?).flatten
47
47
 
48
48
  # Making parallel requests to server for classification
49
- results = parallel_post_requests(server, bearer_token, cleaned_text)
49
+ time_start = Time.now
50
+ results = parallel_post_requests(server, bearer_token, cleaned_text, no_cache)
51
+ time_end = Time.now
50
52
 
51
53
  # After-fix and sorting of results
52
54
  results = sort_results(results, extracted_text, unsplit_text, iteration, doc)
53
55
  final_results = transform_hash(results, unsplit_text)
54
- final_results
56
+
57
+ unless debug
58
+ final_results # Default output
59
+ else
60
+ time_taken = time_end - time_start # Time taken to make requests for debugging purpurses
61
+ return final_results, time_taken
62
+ end
55
63
  end
56
64
 
57
65
  def transform_hash(results, unsplit_text)
@@ -74,6 +82,17 @@ module GoogleLocalResultsAiParser
74
82
  end
75
83
 
76
84
  def sort_results(results, extracted_text, unsplit_text, iteration, doc)
85
+ # Some endpoints load array of hashes whereas some of them
86
+ # load a wrapped version of this. The Free Inference API
87
+ # should be taken as reference since most people will
88
+ # prototype there.
89
+ results.map! do |item|
90
+ if item[:result][0].is_a?(Hash)
91
+ item[:result] = [item[:result]]
92
+ end
93
+ item
94
+ end
95
+
77
96
  # Make at most 2 iterations for after-corrections
78
97
  (0..iteration).each do |i|
79
98
  begin
@@ -475,8 +494,7 @@ module GoogleLocalResultsAiParser
475
494
  end
476
495
  end
477
496
 
478
- already_a_service_option = caught_results_indices.all? {|index| results[index][:result][0][0]["label"] == "service options"}
479
- return results, label_order, duplicates if already_a_service_option
497
+ return results, label_order, duplicates if not_service_option_duplicates == []
480
498
  # Zero out the `type` or `description`, and put it to last position
481
499
  caught_results_indices.each do |caught_index|
482
500
  service_options_hash = results[caught_index][:result][0].find {|hash| hash["label"] == "service options" }
@@ -540,18 +558,23 @@ module GoogleLocalResultsAiParser
540
558
 
541
559
  private
542
560
 
543
- def parallel_post_requests(server, bearer_token, inputs)
561
+ def parallel_post_requests(server, bearer_token, inputs, no_cache)
544
562
  response_bodies = Parallel.map(inputs, in_threads: inputs.size) do |input|
545
- post_request(server, bearer_token, input)
563
+ post_request(server, bearer_token, input, no_cache)
546
564
  end
547
565
 
548
566
  response_bodies
549
567
  end
550
568
 
551
- def post_request(server, bearer_token, input)
569
+ def post_request(server, bearer_token, input, no_cache)
552
570
  url = URI.parse(server)
553
- headers = { 'Authorization' => "Bearer #{bearer_token}", 'Content-Type' => 'application/json' }
554
- body = { inputs: input }.to_json
571
+ headers = unless no_cache
572
+ { 'Authorization' => "Bearer #{bearer_token}", 'Content-Type' => 'application/json' }
573
+ else
574
+ { 'Authorization' => "Bearer #{bearer_token}", 'Content-Type' => 'application/json', 'Cache-Control' => 'no-cache' } # To benchmark initial load of the model
575
+ end
576
+
577
+ body = { inputs: input, parameters: {top_k: 11}}.to_json # 11 represents the number of labels the model has
555
578
 
556
579
  response = HTTP.headers(headers).post(url, body: body)
557
580
  response_body = JSON.parse(response.body)
metadata CHANGED
@@ -1,15 +1,77 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: google-local-results-ai-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.4
4
+ version: 0.2.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Emirhan Akdeniz
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-06-21 00:00:00.000000000 Z
12
- dependencies: []
11
+ date: 2023-07-14 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokolexbor
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: http
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: parallel
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '1.20'
48
+ - - ">="
49
+ - !ruby/object:Gem::Version
50
+ version: 1.20.1
51
+ type: :runtime
52
+ prerelease: false
53
+ version_requirements: !ruby/object:Gem::Requirement
54
+ requirements:
55
+ - - "~>"
56
+ - !ruby/object:Gem::Version
57
+ version: '1.20'
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: 1.20.1
61
+ - !ruby/object:Gem::Dependency
62
+ name: json
63
+ requirement: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - ">="
66
+ - !ruby/object:Gem::Version
67
+ version: '0'
68
+ type: :runtime
69
+ prerelease: false
70
+ version_requirements: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ version: '0'
13
75
  description: A gem to be used with serpapi/bert-base-local-results model to predict
14
76
  different parts of Google Local Listings. This gem uses BERT model at https://huggingface.co/serpapi/bert-base-local-results
15
77
  in the background. For serving private servers, head to https://github.com/serpapi/google-local-results-ai-server