scraper_utils 0.7.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: da26385a1d788bc9ad9d725f0eaefe233d4b70a8bf9aeab0af3168041adc0bc2
4
- data.tar.gz: '02892db893cc706ec67845bde0a05d80b89cdfc2d48759b2e66574e6f87b0031'
3
+ metadata.gz: 4293294d99565b0ee4adc097e9d31619868477e6efa8255f3cd5ec02dc8b275b
4
+ data.tar.gz: e78a08bced34a1f55b8d6e869158887da33f1d6b9803493c7d836402d64aa4ef
5
5
  SHA512:
6
- metadata.gz: b8101b0b0d2ed1d775de54f0e8bac5a1a22ca6f540cea2752de76218a4915c325ec7569ac76a719bf540474f54123cb32f700635160cdbabdcc68679ac33c2e4
7
- data.tar.gz: ae1e5d72f45b077f0525e62dc0399f9bd6f519d52222518e95f57f8f45e03575f3b8b909fb02b030a27b33bfccaefec74bb143d9d13ff1bcfe094ceaa8e369d3
6
+ metadata.gz: ea00598b58e69cf5d911b62148d5d38e13ef017dd5a22159839f3b4437bf28c7c2b17c4832e3869688970037048f69fb543f9645922d6b92888a5fc68d88fc33
7
+ data.tar.gz: 57500daacbec8602c9288bf27bd7ac5f2ff407f621f9ac2c9e114a85fbbb0c555ebebfdc62782b814616b9b46dad65bd5a665b24bb1c9e6774625469bb290485
data/CHANGELOG.md CHANGED
@@ -1,5 +1,10 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.7.1 - 2025-04-15
4
+
5
+ * Accept mixed case suburb names after a comma as well as uppercase suburb names as geocachable
6
+ * Accept more street type abbreviations and check they are on word boundaries
7
+
3
8
  ## 0.7.0 - 2025-04-15
4
9
 
5
10
  * Added Spec helpers and associated doc: `docs/enhancing_specs.md`
@@ -6,11 +6,34 @@ module ScraperUtils
6
6
  # Methods to support specs
7
7
  module SpecSupport
8
8
  AUSTRALIAN_STATES = %w[ACT NSW NT QLD SA TAS VIC WA].freeze
9
- COMMON_STREET_TYPES =
10
- %w[
11
- Avenue Ave Boulevard Court Crt Circle Chase Circuit Close Crescent
12
- Drive Drv Lane Loop Parkway Place Parade Road Rd Street St Square Terrace Way
13
- ].freeze
9
+ STREET_TYPE_PATTERNS = [
10
+ /\bAv(e(nue)?)?\b/i,
11
+ /\bB(oulevard|lvd)\b/i,
12
+ /\b(Circuit|Cct)\b/i,
13
+ /\bCl(ose)?\b/i,
14
+ /\bC(our|r)t\b/i,
15
+ /\bCircle\b/i,
16
+ /\bChase\b/i,
17
+ /\bCr(escent)?\b/i,
18
+ /\bDr((ive)?|v)\b/i,
19
+ /\bEnt(rance)?\b/i,
20
+ /\bGr(ove)?\b/i,
21
+ /\bH(ighwa|w)y\b/i,
22
+ /\bLane\b/i,
23
+ /\bLoop\b/i,
24
+ /\bParkway\b/i,
25
+ /\bPl(ace)?\b/i,
26
+ /\bPriv(ate)?\b/i,
27
+ /\bParade\b/i,
28
+ /\bR(oa)?d\b/i,
29
+ /\bRise\b/i,
30
+ /\bSt(reet)?\b/i,
31
+ /\bSquare\b/i,
32
+ /\bTerrace\b/i,
33
+ /\bWay\b/i
34
+ ].freeze
35
+
36
+
14
37
  AUSTRALIAN_POSTCODES = /\b\d{4}\b/.freeze
15
38
 
16
39
  # Check if an address is likely to be geocodable by analyzing its format.
@@ -25,11 +48,12 @@ module ScraperUtils
25
48
  has_state = AUSTRALIAN_STATES.any? { |state| check_address.end_with?(" #{state}") || check_address.include?(" #{state} ") }
26
49
  has_postcode = address.match?(AUSTRALIAN_POSTCODES)
27
50
 
28
- has_street_type = COMMON_STREET_TYPES.any? { |type| check_address.include?(" #{type}") || check_address.include?(" #{type.upcase}") }
51
+ # Using the pre-compiled patterns
52
+ has_street_type = STREET_TYPE_PATTERNS.any? { |pattern| check_address.match?(pattern) }
29
53
 
30
54
  has_unit_or_lot = address.match?(/\b(Unit|Lot:?)\s+\d+/i)
31
55
 
32
- has_suburb_stats = check_address.match?(/\b[A-Z]{2,}(\s+[A-Z]+)*,?\s+(#{AUSTRALIAN_STATES.join('|')})\b/)
56
+ has_suburb_stats = check_address.match?(/(\b[A-Z]{2,}(\s+[A-Z]+)*,?|,\s+[A-Z][A-Za-z ]+)\s+(#{AUSTRALIAN_STATES.join('|')})\b/)
33
57
 
34
58
  if ENV["DEBUG"]
35
59
  missing = []
@@ -38,7 +62,7 @@ module ScraperUtils
38
62
  end
39
63
  missing << "state" unless has_state
40
64
  missing << "postcode" unless has_postcode
41
- missing << "#{ignore_case ? '' : 'uppercase '}suburb state" unless has_suburb_stats
65
+ missing << "suburb state" unless has_suburb_stats
42
66
  puts " address: #{address} is not geocodable, missing #{missing.join(', ')}" if missing.any?
43
67
  end
44
68
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module ScraperUtils
4
- VERSION = "0.7.0"
4
+ VERSION = "0.7.1"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: scraper_utils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.0
4
+ version: 0.7.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ian Heggie
@@ -118,7 +118,7 @@ metadata:
118
118
  allowed_push_host: https://rubygems.org
119
119
  homepage_uri: https://github.com/ianheggie-oaf/scraper_utils
120
120
  source_code_uri: https://github.com/ianheggie-oaf/scraper_utils
121
- documentation_uri: https://rubydoc.info/gems/scraper_utils/0.7.0
121
+ documentation_uri: https://rubydoc.info/gems/scraper_utils/0.7.1
122
122
  changelog_uri: https://github.com/ianheggie-oaf/scraper_utils/blob/main/CHANGELOG.md
123
123
  rubygems_mfa_required: 'true'
124
124
  post_install_message: