google-local-results-ai-parser 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/google-local-results-ai-parser.rb +102 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7b078ffd7df8466aa1859da5120725c18b9425ddb331a16a54c913811af0d3b8
|
4
|
+
data.tar.gz: 637c78ed81d02642834e083b1bbcbab1962a09afd218e490e02ffc9ae0232013
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 216e1a13a42b32294a5196f26bd34a05f05cf53cfbf4b50f05b2f7ba93f1f8349a1eff7becd6900f6d96e2c18af910af96be8c92b6a5e467262d672aff4e97d5
|
7
|
+
data.tar.gz: 3dc359ac52dd5e3475bdb4817135f17cc98621a1a59d21078c13090d3e16d6bf10b37520c627fbc0e203f3c6fa51257bb3ae682e65e8b146b5eb8bc1ed187a13
|
@@ -94,7 +94,10 @@ module GoogleLocalResultsAiParser
|
|
94
94
|
results, label_order, duplicates = description_as_hours_confusion(results, label_order, duplicates)
|
95
95
|
results, label_order, duplicates = description_as_type_confusion(results, label_order, duplicates)
|
96
96
|
results, label_order, duplicates = reviews_as_rating_confusion(results, label_order, duplicates)
|
97
|
+
results, label_order, duplicates = reviews_as_price_confusion(results, label_order, duplicates)
|
97
98
|
results, label_order, duplicates = button_text_as_hours_confusion(results, label_order, duplicates)
|
99
|
+
results, label_order, duplicates = button_text_as_address_confusion(results, label_order, duplicates)
|
100
|
+
results, label_order, duplicates = button_text_as_service_options_confusion(results, label_order, duplicates)
|
98
101
|
|
99
102
|
# General clashes
|
100
103
|
line_result = check_if_on_different_lines(results, duplicates, unsplit_text)
|
@@ -240,6 +243,73 @@ module GoogleLocalResultsAiParser
|
|
240
243
|
return results, label_order, duplicates
|
241
244
|
end
|
242
245
|
|
246
|
+
# 104 Ave ... Share
|
247
|
+
# Fixes `Share`
|
248
|
+
def button_text_as_address_confusion(results, label_order, duplicates)
|
249
|
+
known_errors = ["Share"]
|
250
|
+
confusion_condition = results.any? {|result| known_errors.include?(result[:input])}
|
251
|
+
return results, label_order, duplicates unless confusion_condition
|
252
|
+
|
253
|
+
address_duplicate = duplicates.find.with_index do |duplicate, duplicate_index|
|
254
|
+
if results[duplicate[0]][:result][0][0]["label"] == "address"
|
255
|
+
duplicate_index
|
256
|
+
end
|
257
|
+
end
|
258
|
+
|
259
|
+
# Delete the known button text directly
|
260
|
+
results.delete_at(address_duplicate[-1])
|
261
|
+
|
262
|
+
# Rearranging `label_order`
|
263
|
+
label_order.delete_at(address_duplicate[-1])
|
264
|
+
|
265
|
+
# Rearranging duplicates
|
266
|
+
last_item = duplicates[duplicates.index(address_duplicate)][-1]
|
267
|
+
duplicates[duplicates.index(address_duplicate)].delete(last_item)
|
268
|
+
|
269
|
+
if (duplicate_arr = duplicates[duplicates.index(address_duplicate)]) && duplicate_arr.size == 1
|
270
|
+
duplicates.delete(duplicate_arr)
|
271
|
+
end
|
272
|
+
|
273
|
+
return results, label_order, duplicates
|
274
|
+
end
|
275
|
+
|
276
|
+
# Order pickup
|
277
|
+
# Fixes `Order pickup`
|
278
|
+
def button_text_as_service_options_confusion(results, label_order, duplicates)
|
279
|
+
known_errors = ["Order pickup"]
|
280
|
+
confusion_condition = results.any? {|result| known_errors.include?(result[:input])}
|
281
|
+
return results, label_order, duplicates unless confusion_condition
|
282
|
+
|
283
|
+
service_options_indexes = results.map {|result| results.index(result) if known_errors.include?(result[:input])}.compact
|
284
|
+
|
285
|
+
service_options_duplicate = duplicates.find.with_index do |duplicate, duplicate_index|
|
286
|
+
if results[duplicate[0]][:result][0][0]["label"] == "service options"
|
287
|
+
duplicate_index
|
288
|
+
end
|
289
|
+
end
|
290
|
+
|
291
|
+
# Delete the known button text directly
|
292
|
+
service_options_indexes.each {|index| results.delete_at(index)}
|
293
|
+
|
294
|
+
# Rearranging `label_order`
|
295
|
+
service_options_indexes.each {|index| label_order.delete_at(index)}
|
296
|
+
|
297
|
+
# Rearranging duplicates
|
298
|
+
service_options_indexes.each do |index|
|
299
|
+
duplicates.each_with_index do |duplicate, duplicate_index|
|
300
|
+
if duplicate.include?(index)
|
301
|
+
duplicates[duplicate_index].delete(index)
|
302
|
+
end
|
303
|
+
end
|
304
|
+
end
|
305
|
+
|
306
|
+
if service_options_duplicate && (duplicate_arr = duplicates[duplicates.index(service_options_duplicate)]) && duplicate_arr.size == 1
|
307
|
+
duplicates.delete(duplicate_arr)
|
308
|
+
end
|
309
|
+
|
310
|
+
return results, label_order, duplicates
|
311
|
+
end
|
312
|
+
|
243
313
|
# 3.4 .. (1.4K)
|
244
314
|
# Fixes `(1.4K)`
|
245
315
|
def reviews_as_rating_confusion(results, label_order, duplicates)
|
@@ -272,6 +342,38 @@ module GoogleLocalResultsAiParser
|
|
272
342
|
return results, label_order, duplicates
|
273
343
|
end
|
274
344
|
|
345
|
+
# (1.6K) .. $
|
346
|
+
# Fixes `(1.6K)`
|
347
|
+
def reviews_as_price_confusion(results, label_order, duplicates)
|
348
|
+
price_duplicate = duplicates.find.with_index do |duplicate, duplicate_index|
|
349
|
+
if results[duplicate[0]][:result][0][0]["label"] == "price"
|
350
|
+
duplicate_index
|
351
|
+
end
|
352
|
+
end
|
353
|
+
|
354
|
+
if price_duplicate && results[price_duplicate[0]][:input][/\(\d+\.\d+\w\)/]
|
355
|
+
# Zero out the `price`, and put it to last position
|
356
|
+
reviews_hash = results[price_duplicate[-1]][:result][0].find {|hash| hash["label"] == "reviews" }
|
357
|
+
reviews_index = results[price_duplicate[-1]][:result][0].index(reviews_hash)
|
358
|
+
results[price_duplicate[-1]][:result][0][0] = {"label" => "reviews", "score" => 1.0}
|
359
|
+
results[price_duplicate[-1]][:result][0].delete_at(reviews_index)
|
360
|
+
results[price_duplicate[-1]][:result][0] << {"label" => "price", "score" => 0.0}
|
361
|
+
|
362
|
+
# Rearranging `label_order`
|
363
|
+
label_order[price_duplicate[-1]] = "reviews"
|
364
|
+
|
365
|
+
# Rearranging duplicates
|
366
|
+
last_item = duplicates[duplicates.index(price_duplicate)][-1]
|
367
|
+
duplicates[duplicates.index(price_duplicate)].delete(last_item)
|
368
|
+
|
369
|
+
if (duplicate_arr = duplicates[duplicates.index(price_duplicate)]) && duplicate_arr.size == 1
|
370
|
+
duplicates.delete(duplicate_arr)
|
371
|
+
end
|
372
|
+
end
|
373
|
+
|
374
|
+
return results, label_order, duplicates
|
375
|
+
end
|
376
|
+
|
275
377
|
# Coffee shop ... Iconic Seattle-based coffeehouse chain
|
276
378
|
# Fixes `Iconic Seattle-based coffeehouse chain`
|
277
379
|
def description_as_type_confusion(results, label_order, duplicates)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: google-local-results-ai-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Emirhan Akdeniz
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-06-
|
11
|
+
date: 2023-06-20 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: A gem to be used with serpapi/bert-base-local-results model to predict
|
14
14
|
different parts of Google Local Listings. This gem uses BERT model at https://huggingface.co/serpapi/bert-base-local-results
|