google-local-results-ai-parser 0.2.3 → 0.2.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 62925be13b2ff224585862444f2e77bdea9f7975316b2f66253e7e2fa7a6fe34
4
- data.tar.gz: e2ec29030b6c442f955b3d4c54b21473a23f214d3db0944e2006c6ac3c7c9446
3
+ metadata.gz: 23d5956da19fd752805805c70fbd41c9b0c4b19a8b080d81a399e9efbced322c
4
+ data.tar.gz: 2ae85f484532bf47836efe3b354cb342d2672526adec07d86e8e294a2206f856
5
5
  SHA512:
6
- metadata.gz: 48054b17769730be43150502b909a82666adca5f35b2bf6fa235cf4b96da37040338eb41c62382da0320729158b3d70898efb6d595ad6cef7363d212aa4414f1
7
- data.tar.gz: af7d2196995a5991c017940af28de5816e1fc9535b1424c070670db65cf4f6df546d76d2c77ed0f64281810dda649f769e7ead071e5c99c68f43a18c4dbf89ee
6
+ metadata.gz: 77d0a15beddddc3e9324cc56c71f16393d1222cc3bab8dcb6df5becf0a0ed3d8a35bb08e4a25da0f463411ee154af1807be279046d551381a595a583f5a661d9
7
+ data.tar.gz: c2a9844133da6f5a60cfb2f499e23b265043c41ec9a0d9216cba21ce5f4f5bb1e6766e40e1f1bd6eec41d9143174d47b0e468d1c0d280470c03d2cc7a0bbfb88
@@ -2,7 +2,6 @@ require 'nokolexbor'
2
2
  require 'http'
3
3
  require 'parallel'
4
4
  require 'json'
5
- require 'pry'
6
5
 
7
6
  module GoogleLocalResultsAiParser
8
7
  DEFAULT_SERVER = 'https://api-inference.huggingface.co/models/serpapi/bert-base-local-results'.freeze
@@ -25,13 +24,13 @@ module GoogleLocalResultsAiParser
25
24
  end
26
25
 
27
26
  class << self
28
- def parse_multiple(html_parts: nil, bearer_token: nil, server: DEFAULT_SERVER, separator_regex: DEFAULT_SEPARATOR_REGEX, rejected_css: DEFAULT_REJECTED_CSS, broken_css: DEFAULT_BROKEN_CSS, iteration: DEFAULT_MAX_ITERATION)
27
+ def parse_multiple(html_parts: nil, bearer_token: nil, server: DEFAULT_SERVER, separator_regex: DEFAULT_SEPARATOR_REGEX, rejected_css: DEFAULT_REJECTED_CSS, broken_css: DEFAULT_BROKEN_CSS, iteration: DEFAULT_MAX_ITERATION, debug: false, no_cache: false)
29
28
  response_bodies = Parallel.map(html_parts, in_threads: html_parts.size) do |html|
30
- parse(html: html, bearer_token: bearer_token, server: server, separator_regex: separator_regex, rejected_css: rejected_css, broken_css: DEFAULT_BROKEN_CSS, iteration: DEFAULT_MAX_ITERATION)
29
+ parse(html: html, bearer_token: bearer_token, server: server, separator_regex: separator_regex, rejected_css: rejected_css, broken_css: broken_css, iteration: iteration, debug: debug, no_cache: no_cache)
31
30
  end
32
31
  end
33
32
 
34
- def parse(html: nil, bearer_token: nil, server: DEFAULT_SERVER, separator_regex: DEFAULT_SEPARATOR_REGEX, rejected_css: DEFAULT_REJECTED_CSS, broken_css: DEFAULT_BROKEN_CSS, iteration: DEFAULT_MAX_ITERATION)
33
+ def parse(html: nil, bearer_token: nil, server: DEFAULT_SERVER, separator_regex: DEFAULT_SEPARATOR_REGEX, rejected_css: DEFAULT_REJECTED_CSS, broken_css: DEFAULT_BROKEN_CSS, iteration: DEFAULT_MAX_ITERATION, debug: false, no_cache: false)
35
34
  doc = Nokolexbor::HTML(html)
36
35
 
37
36
  # Rejecting title, buttons, and label
@@ -47,12 +46,20 @@ module GoogleLocalResultsAiParser
47
46
  cleaned_text = split_text.map(&:strip).reject(&:empty?).flatten
48
47
 
49
48
  # Making parallel requests to server for classification
50
- results = parallel_post_requests(server, bearer_token, cleaned_text)
49
+ time_start = Time.now
50
+ results = parallel_post_requests(server, bearer_token, cleaned_text, no_cache)
51
+ time_end = Time.now
51
52
 
52
53
  # After-fix and sorting of results
53
54
  results = sort_results(results, extracted_text, unsplit_text, iteration, doc)
54
55
  final_results = transform_hash(results, unsplit_text)
55
- final_results
56
+
57
+ unless debug
58
+ final_results # Default output
59
+ else
60
+ time_taken = time_end - time_start # Time taken to make requests for debugging purpurses
61
+ return final_results, time_taken
62
+ end
56
63
  end
57
64
 
58
65
  def transform_hash(results, unsplit_text)
@@ -75,6 +82,17 @@ module GoogleLocalResultsAiParser
75
82
  end
76
83
 
77
84
  def sort_results(results, extracted_text, unsplit_text, iteration, doc)
85
+ # Some endpoints load array of hashes whereas some of them
86
+ # load a wrapped version of this. The Free Inference API
87
+ # should be taken as reference since most people will
88
+ # prototype there.
89
+ results.map! do |item|
90
+ if item[:result][0].is_a?(Hash)
91
+ item[:result] = [item[:result]]
92
+ end
93
+ item
94
+ end
95
+
78
96
  # Make at most 2 iterations for after-corrections
79
97
  (0..iteration).each do |i|
80
98
  begin
@@ -540,18 +558,23 @@ module GoogleLocalResultsAiParser
540
558
 
541
559
  private
542
560
 
543
- def parallel_post_requests(server, bearer_token, inputs)
561
+ def parallel_post_requests(server, bearer_token, inputs, no_cache)
544
562
  response_bodies = Parallel.map(inputs, in_threads: inputs.size) do |input|
545
- post_request(server, bearer_token, input)
563
+ post_request(server, bearer_token, input, no_cache)
546
564
  end
547
565
 
548
566
  response_bodies
549
567
  end
550
568
 
551
- def post_request(server, bearer_token, input)
569
+ def post_request(server, bearer_token, input, no_cache)
552
570
  url = URI.parse(server)
553
- headers = { 'Authorization' => "Bearer #{bearer_token}", 'Content-Type' => 'application/json' }
554
- body = { inputs: input }.to_json
571
+ headers = unless no_cache
572
+ { 'Authorization' => "Bearer #{bearer_token}", 'Content-Type' => 'application/json' }
573
+ else
574
+ { 'Authorization' => "Bearer #{bearer_token}", 'Content-Type' => 'application/json', 'Cache-Control' => 'no-cache' } # To benchmark initial load of the model
575
+ end
576
+
577
+ body = { inputs: input, parameters: {top_k: 11}}.to_json # 11 represents the number of labels the model has
555
578
 
556
579
  response = HTTP.headers(headers).post(url, body: body)
557
580
  response_body = JSON.parse(response.body)
metadata CHANGED
@@ -1,15 +1,77 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: google-local-results-ai-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
4
+ version: 0.2.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Emirhan Akdeniz
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-06-21 00:00:00.000000000 Z
12
- dependencies: []
11
+ date: 2023-07-14 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokolexbor
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: http
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: parallel
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '1.20'
48
+ - - ">="
49
+ - !ruby/object:Gem::Version
50
+ version: 1.20.1
51
+ type: :runtime
52
+ prerelease: false
53
+ version_requirements: !ruby/object:Gem::Requirement
54
+ requirements:
55
+ - - "~>"
56
+ - !ruby/object:Gem::Version
57
+ version: '1.20'
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: 1.20.1
61
+ - !ruby/object:Gem::Dependency
62
+ name: json
63
+ requirement: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - ">="
66
+ - !ruby/object:Gem::Version
67
+ version: '0'
68
+ type: :runtime
69
+ prerelease: false
70
+ version_requirements: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ version: '0'
13
75
  description: A gem to be used with serpapi/bert-base-local-results model to predict
14
76
  different parts of Google Local Listings. This gem uses BERT model at https://huggingface.co/serpapi/bert-base-local-results
15
77
  in the background. For serving private servers, head to https://github.com/serpapi/google-local-results-ai-server