google-local-results-ai-parser 0.2.3 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 62925be13b2ff224585862444f2e77bdea9f7975316b2f66253e7e2fa7a6fe34
4
- data.tar.gz: e2ec29030b6c442f955b3d4c54b21473a23f214d3db0944e2006c6ac3c7c9446
3
+ metadata.gz: 23d5956da19fd752805805c70fbd41c9b0c4b19a8b080d81a399e9efbced322c
4
+ data.tar.gz: 2ae85f484532bf47836efe3b354cb342d2672526adec07d86e8e294a2206f856
5
5
  SHA512:
6
- metadata.gz: 48054b17769730be43150502b909a82666adca5f35b2bf6fa235cf4b96da37040338eb41c62382da0320729158b3d70898efb6d595ad6cef7363d212aa4414f1
7
- data.tar.gz: af7d2196995a5991c017940af28de5816e1fc9535b1424c070670db65cf4f6df546d76d2c77ed0f64281810dda649f769e7ead071e5c99c68f43a18c4dbf89ee
6
+ metadata.gz: 77d0a15beddddc3e9324cc56c71f16393d1222cc3bab8dcb6df5becf0a0ed3d8a35bb08e4a25da0f463411ee154af1807be279046d551381a595a583f5a661d9
7
+ data.tar.gz: c2a9844133da6f5a60cfb2f499e23b265043c41ec9a0d9216cba21ce5f4f5bb1e6766e40e1f1bd6eec41d9143174d47b0e468d1c0d280470c03d2cc7a0bbfb88
@@ -2,7 +2,6 @@ require 'nokolexbor'
2
2
  require 'http'
3
3
  require 'parallel'
4
4
  require 'json'
5
- require 'pry'
6
5
 
7
6
  module GoogleLocalResultsAiParser
8
7
  DEFAULT_SERVER = 'https://api-inference.huggingface.co/models/serpapi/bert-base-local-results'.freeze
@@ -25,13 +24,13 @@ module GoogleLocalResultsAiParser
25
24
  end
26
25
 
27
26
  class << self
28
- def parse_multiple(html_parts: nil, bearer_token: nil, server: DEFAULT_SERVER, separator_regex: DEFAULT_SEPARATOR_REGEX, rejected_css: DEFAULT_REJECTED_CSS, broken_css: DEFAULT_BROKEN_CSS, iteration: DEFAULT_MAX_ITERATION)
27
+ def parse_multiple(html_parts: nil, bearer_token: nil, server: DEFAULT_SERVER, separator_regex: DEFAULT_SEPARATOR_REGEX, rejected_css: DEFAULT_REJECTED_CSS, broken_css: DEFAULT_BROKEN_CSS, iteration: DEFAULT_MAX_ITERATION, debug: false, no_cache: false)
29
28
  response_bodies = Parallel.map(html_parts, in_threads: html_parts.size) do |html|
30
- parse(html: html, bearer_token: bearer_token, server: server, separator_regex: separator_regex, rejected_css: rejected_css, broken_css: DEFAULT_BROKEN_CSS, iteration: DEFAULT_MAX_ITERATION)
29
+ parse(html: html, bearer_token: bearer_token, server: server, separator_regex: separator_regex, rejected_css: rejected_css, broken_css: broken_css, iteration: iteration, debug: debug, no_cache: no_cache)
31
30
  end
32
31
  end
33
32
 
34
- def parse(html: nil, bearer_token: nil, server: DEFAULT_SERVER, separator_regex: DEFAULT_SEPARATOR_REGEX, rejected_css: DEFAULT_REJECTED_CSS, broken_css: DEFAULT_BROKEN_CSS, iteration: DEFAULT_MAX_ITERATION)
33
+ def parse(html: nil, bearer_token: nil, server: DEFAULT_SERVER, separator_regex: DEFAULT_SEPARATOR_REGEX, rejected_css: DEFAULT_REJECTED_CSS, broken_css: DEFAULT_BROKEN_CSS, iteration: DEFAULT_MAX_ITERATION, debug: false, no_cache: false)
35
34
  doc = Nokolexbor::HTML(html)
36
35
 
37
36
  # Rejecting title, buttons, and label
@@ -47,12 +46,20 @@ module GoogleLocalResultsAiParser
47
46
  cleaned_text = split_text.map(&:strip).reject(&:empty?).flatten
48
47
 
49
48
  # Making parallel requests to server for classification
50
- results = parallel_post_requests(server, bearer_token, cleaned_text)
49
+ time_start = Time.now
50
+ results = parallel_post_requests(server, bearer_token, cleaned_text, no_cache)
51
+ time_end = Time.now
51
52
 
52
53
  # After-fix and sorting of results
53
54
  results = sort_results(results, extracted_text, unsplit_text, iteration, doc)
54
55
  final_results = transform_hash(results, unsplit_text)
55
- final_results
56
+
57
+ unless debug
58
+ final_results # Default output
59
+ else
60
+ time_taken = time_end - time_start # Time taken to make requests for debugging purpurses
61
+ return final_results, time_taken
62
+ end
56
63
  end
57
64
 
58
65
  def transform_hash(results, unsplit_text)
@@ -75,6 +82,17 @@ module GoogleLocalResultsAiParser
75
82
  end
76
83
 
77
84
  def sort_results(results, extracted_text, unsplit_text, iteration, doc)
85
+ # Some endpoints load array of hashes whereas some of them
86
+ # load a wrapped version of this. The Free Inference API
87
+ # should be taken as reference since most people will
88
+ # prototype there.
89
+ results.map! do |item|
90
+ if item[:result][0].is_a?(Hash)
91
+ item[:result] = [item[:result]]
92
+ end
93
+ item
94
+ end
95
+
78
96
  # Make at most 2 iterations for after-corrections
79
97
  (0..iteration).each do |i|
80
98
  begin
@@ -540,18 +558,23 @@ module GoogleLocalResultsAiParser
540
558
 
541
559
  private
542
560
 
543
- def parallel_post_requests(server, bearer_token, inputs)
561
+ def parallel_post_requests(server, bearer_token, inputs, no_cache)
544
562
  response_bodies = Parallel.map(inputs, in_threads: inputs.size) do |input|
545
- post_request(server, bearer_token, input)
563
+ post_request(server, bearer_token, input, no_cache)
546
564
  end
547
565
 
548
566
  response_bodies
549
567
  end
550
568
 
551
- def post_request(server, bearer_token, input)
569
+ def post_request(server, bearer_token, input, no_cache)
552
570
  url = URI.parse(server)
553
- headers = { 'Authorization' => "Bearer #{bearer_token}", 'Content-Type' => 'application/json' }
554
- body = { inputs: input }.to_json
571
+ headers = unless no_cache
572
+ { 'Authorization' => "Bearer #{bearer_token}", 'Content-Type' => 'application/json' }
573
+ else
574
+ { 'Authorization' => "Bearer #{bearer_token}", 'Content-Type' => 'application/json', 'Cache-Control' => 'no-cache' } # To benchmark initial load of the model
575
+ end
576
+
577
+ body = { inputs: input, parameters: {top_k: 11}}.to_json # 11 represents the number of labels the model has
555
578
 
556
579
  response = HTTP.headers(headers).post(url, body: body)
557
580
  response_body = JSON.parse(response.body)
metadata CHANGED
@@ -1,15 +1,77 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: google-local-results-ai-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
4
+ version: 0.2.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Emirhan Akdeniz
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-06-21 00:00:00.000000000 Z
12
- dependencies: []
11
+ date: 2023-07-14 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokolexbor
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: http
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: parallel
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '1.20'
48
+ - - ">="
49
+ - !ruby/object:Gem::Version
50
+ version: 1.20.1
51
+ type: :runtime
52
+ prerelease: false
53
+ version_requirements: !ruby/object:Gem::Requirement
54
+ requirements:
55
+ - - "~>"
56
+ - !ruby/object:Gem::Version
57
+ version: '1.20'
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: 1.20.1
61
+ - !ruby/object:Gem::Dependency
62
+ name: json
63
+ requirement: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - ">="
66
+ - !ruby/object:Gem::Version
67
+ version: '0'
68
+ type: :runtime
69
+ prerelease: false
70
+ version_requirements: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ version: '0'
13
75
  description: A gem to be used with serpapi/bert-base-local-results model to predict
14
76
  different parts of Google Local Listings. This gem uses BERT model at https://huggingface.co/serpapi/bert-base-local-results
15
77
  in the background. For serving private servers, head to https://github.com/serpapi/google-local-results-ai-server