google-local-results-ai-parser 0.2.4 → 0.2.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9f8076de3793dabc268bc5705443e28202655e26c5a1503498767e1b8a62ff16
4
- data.tar.gz: 7e3cf96add3d90d2e6174f9acda7f6cd8357f2f06792d46fc308aa80cc7fa93f
3
+ metadata.gz: 23d5956da19fd752805805c70fbd41c9b0c4b19a8b080d81a399e9efbced322c
4
+ data.tar.gz: 2ae85f484532bf47836efe3b354cb342d2672526adec07d86e8e294a2206f856
5
5
  SHA512:
6
- metadata.gz: 91c997a71f5765a0d43cc1c4418cecc1a83b00eb3b418f4aed6802a1c5fa78391b337384433147cf5c7eb8af5455ad07289f8440c31e58585493978d4673394a
7
- data.tar.gz: b0d1a462cba2a8097c0bc54ec0142e78b5d7daffe5c5ed4c663ddb7522c35391ee4ba9587f803a711f2e0fbe7395ab7d353254eda6ecab6212bbeed64c563b4d
6
+ metadata.gz: 77d0a15beddddc3e9324cc56c71f16393d1222cc3bab8dcb6df5becf0a0ed3d8a35bb08e4a25da0f463411ee154af1807be279046d551381a595a583f5a661d9
7
+ data.tar.gz: c2a9844133da6f5a60cfb2f499e23b265043c41ec9a0d9216cba21ce5f4f5bb1e6766e40e1f1bd6eec41d9143174d47b0e468d1c0d280470c03d2cc7a0bbfb88
@@ -24,13 +24,13 @@ module GoogleLocalResultsAiParser
24
24
  end
25
25
 
26
26
  class << self
27
- def parse_multiple(html_parts: nil, bearer_token: nil, server: DEFAULT_SERVER, separator_regex: DEFAULT_SEPARATOR_REGEX, rejected_css: DEFAULT_REJECTED_CSS, broken_css: DEFAULT_BROKEN_CSS, iteration: DEFAULT_MAX_ITERATION)
27
+ def parse_multiple(html_parts: nil, bearer_token: nil, server: DEFAULT_SERVER, separator_regex: DEFAULT_SEPARATOR_REGEX, rejected_css: DEFAULT_REJECTED_CSS, broken_css: DEFAULT_BROKEN_CSS, iteration: DEFAULT_MAX_ITERATION, debug: false, no_cache: false)
28
28
  response_bodies = Parallel.map(html_parts, in_threads: html_parts.size) do |html|
29
- parse(html: html, bearer_token: bearer_token, server: server, separator_regex: separator_regex, rejected_css: rejected_css, broken_css: DEFAULT_BROKEN_CSS, iteration: DEFAULT_MAX_ITERATION)
29
+ parse(html: html, bearer_token: bearer_token, server: server, separator_regex: separator_regex, rejected_css: rejected_css, broken_css: broken_css, iteration: iteration, debug: debug, no_cache: no_cache)
30
30
  end
31
31
  end
32
32
 
33
- def parse(html: nil, bearer_token: nil, server: DEFAULT_SERVER, separator_regex: DEFAULT_SEPARATOR_REGEX, rejected_css: DEFAULT_REJECTED_CSS, broken_css: DEFAULT_BROKEN_CSS, iteration: DEFAULT_MAX_ITERATION)
33
+ def parse(html: nil, bearer_token: nil, server: DEFAULT_SERVER, separator_regex: DEFAULT_SEPARATOR_REGEX, rejected_css: DEFAULT_REJECTED_CSS, broken_css: DEFAULT_BROKEN_CSS, iteration: DEFAULT_MAX_ITERATION, debug: false, no_cache: false)
34
34
  doc = Nokolexbor::HTML(html)
35
35
 
36
36
  # Rejecting title, buttons, and label
@@ -46,12 +46,20 @@ module GoogleLocalResultsAiParser
46
46
  cleaned_text = split_text.map(&:strip).reject(&:empty?).flatten
47
47
 
48
48
  # Making parallel requests to server for classification
49
- results = parallel_post_requests(server, bearer_token, cleaned_text)
49
+ time_start = Time.now
50
+ results = parallel_post_requests(server, bearer_token, cleaned_text, no_cache)
51
+ time_end = Time.now
50
52
 
51
53
  # After-fix and sorting of results
52
54
  results = sort_results(results, extracted_text, unsplit_text, iteration, doc)
53
55
  final_results = transform_hash(results, unsplit_text)
54
- final_results
56
+
57
+ unless debug
58
+ final_results # Default output
59
+ else
60
+ time_taken = time_end - time_start # Time taken to make requests for debugging purpurses
61
+ return final_results, time_taken
62
+ end
55
63
  end
56
64
 
57
65
  def transform_hash(results, unsplit_text)
@@ -74,6 +82,17 @@ module GoogleLocalResultsAiParser
74
82
  end
75
83
 
76
84
  def sort_results(results, extracted_text, unsplit_text, iteration, doc)
85
+ # Some endpoints load array of hashes whereas some of them
86
+ # load a wrapped version of this. The Free Inference API
87
+ # should be taken as reference since most people will
88
+ # prototype there.
89
+ results.map! do |item|
90
+ if item[:result][0].is_a?(Hash)
91
+ item[:result] = [item[:result]]
92
+ end
93
+ item
94
+ end
95
+
77
96
  # Make at most 2 iterations for after-corrections
78
97
  (0..iteration).each do |i|
79
98
  begin
@@ -475,8 +494,7 @@ module GoogleLocalResultsAiParser
475
494
  end
476
495
  end
477
496
 
478
- already_a_service_option = caught_results_indices.all? {|index| results[index][:result][0][0]["label"] == "service options"}
479
- return results, label_order, duplicates if already_a_service_option
497
+ return results, label_order, duplicates if not_service_option_duplicates == []
480
498
  # Zero out the `type` or `description`, and put it to last position
481
499
  caught_results_indices.each do |caught_index|
482
500
  service_options_hash = results[caught_index][:result][0].find {|hash| hash["label"] == "service options" }
@@ -540,18 +558,23 @@ module GoogleLocalResultsAiParser
540
558
 
541
559
  private
542
560
 
543
- def parallel_post_requests(server, bearer_token, inputs)
561
+ def parallel_post_requests(server, bearer_token, inputs, no_cache)
544
562
  response_bodies = Parallel.map(inputs, in_threads: inputs.size) do |input|
545
- post_request(server, bearer_token, input)
563
+ post_request(server, bearer_token, input, no_cache)
546
564
  end
547
565
 
548
566
  response_bodies
549
567
  end
550
568
 
551
- def post_request(server, bearer_token, input)
569
+ def post_request(server, bearer_token, input, no_cache)
552
570
  url = URI.parse(server)
553
- headers = { 'Authorization' => "Bearer #{bearer_token}", 'Content-Type' => 'application/json' }
554
- body = { inputs: input }.to_json
571
+ headers = unless no_cache
572
+ { 'Authorization' => "Bearer #{bearer_token}", 'Content-Type' => 'application/json' }
573
+ else
574
+ { 'Authorization' => "Bearer #{bearer_token}", 'Content-Type' => 'application/json', 'Cache-Control' => 'no-cache' } # To benchmark initial load of the model
575
+ end
576
+
577
+ body = { inputs: input, parameters: {top_k: 11}}.to_json # 11 represents the number of labels the model has
555
578
 
556
579
  response = HTTP.headers(headers).post(url, body: body)
557
580
  response_body = JSON.parse(response.body)
metadata CHANGED
@@ -1,15 +1,77 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: google-local-results-ai-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.4
4
+ version: 0.2.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Emirhan Akdeniz
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-06-21 00:00:00.000000000 Z
12
- dependencies: []
11
+ date: 2023-07-14 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokolexbor
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: http
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: parallel
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '1.20'
48
+ - - ">="
49
+ - !ruby/object:Gem::Version
50
+ version: 1.20.1
51
+ type: :runtime
52
+ prerelease: false
53
+ version_requirements: !ruby/object:Gem::Requirement
54
+ requirements:
55
+ - - "~>"
56
+ - !ruby/object:Gem::Version
57
+ version: '1.20'
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: 1.20.1
61
+ - !ruby/object:Gem::Dependency
62
+ name: json
63
+ requirement: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - ">="
66
+ - !ruby/object:Gem::Version
67
+ version: '0'
68
+ type: :runtime
69
+ prerelease: false
70
+ version_requirements: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ version: '0'
13
75
  description: A gem to be used with serpapi/bert-base-local-results model to predict
14
76
  different parts of Google Local Listings. This gem uses BERT model at https://huggingface.co/serpapi/bert-base-local-results
15
77
  in the background. For serving private servers, head to https://github.com/serpapi/google-local-results-ai-server