mediawiki_table_scraper 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 82745728df615aa53c1e9275ddfabdd1476217f8
4
- data.tar.gz: 9a1115015b99c30cbea93f6947ce0c1fc622eff0
2
+ SHA256:
3
+ metadata.gz: ea524b1fbce282df1f9a05177d7155af511ca8590edbb7ca4bcce4f00c8f2667
4
+ data.tar.gz: d398ddb0aa66b182022792e0c17d87cb7af6cbd2d64b3fdfaf36ebfe484041a6
5
5
  SHA512:
6
- metadata.gz: 82e76b145d542b0b953d3600ededb71776e36941ec01b0d3442c937289d7d9c481a302e3f387d5cc49e028f40061338fc92032f6f8c033d96441b73b8a9b1122
7
- data.tar.gz: 734127c59510a08c4171f6eff361b9b413eeb64775f4e1a3d6debef66fcb7211865a06da72cb7dadd37514ce529f3df67b10d91cf167a10375865836def10dc9
6
+ metadata.gz: 271ad8b61104df171b564b2db3f18b1a3af0f13fc9120e46f8d256e1ef4603558ac754e3a32ac5972dfdce28958ed02f08c952358123c69c33cfd50c3c1578ee
7
+ data.tar.gz: 436fdbd37167105b68a50b8d9910426fddc72e64b3626ec92f7190aaa10da200279f258a8bfae459702cc244d49650435cc745a49af2d2d1306c619f6ea80fdb
checksums.yaml.gz.sig CHANGED
Binary file
@@ -10,40 +10,44 @@ class MediaWikiTableScraper
10
10
 
11
11
  attr_reader :tables
12
12
 
13
+
13
14
  def initialize(url)
14
15
 
15
16
  doc = Nokorexi.new(url).to_doc
16
17
 
17
18
  tables = doc.root.css('.wikitable')
19
+
18
20
  # Fetch the records as an array of hash records for each table
19
21
 
20
- @tables = tables.map do |table|
22
+ @tables = tables.map.with_index do |table, i|
21
23
 
22
- rows = table.xpath 'tr'
24
+ puts 'i: ' + i.inspect
25
+
26
+ rows = table.xpath 'tbody/tr'
23
27
 
24
28
  # fetch the column names
25
29
  labels = rows.shift.xpath 'th/text()'
26
- names = labels.map {|x| x.downcase.to_sym }
30
+ names = labels.map {|x| x.chomp.downcase.to_sym }
27
31
 
28
- a = rows.map do |row|
32
+ a = rows.map do |row|
29
33
 
30
- row.xpath('td').map do |x|
34
+ row.xpath('td').map do |col|
31
35
 
32
- if x.has_elements? then
36
+ if col.has_elements? then
33
37
 
34
- x.children.map do |c|
38
+ col.children.map do |c|
35
39
  c.is_a?(String) ? c : c.xml.gsub(/<\/?\w+[^>]*>/,'')
36
40
  end.join ' '
37
41
 
38
42
  else
39
- x.text.to_s
43
+ col.text.to_s
40
44
  end
41
45
 
42
46
  end
43
47
 
44
48
  end
45
49
 
46
- a2 = a.map {|rows| names.zip(rows).to_h }
50
+ a.map {|rows| names.zip(rows).to_h }
47
51
 
48
52
  end
49
53
  end
@@ -53,4 +57,3 @@ class MediaWikiTableScraper
53
57
  end
54
58
 
55
59
  end
56
-
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mediawiki_table_scraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Robertson
@@ -10,28 +10,32 @@ bindir: bin
10
10
  cert_chain:
11
11
  - |
12
12
  -----BEGIN CERTIFICATE-----
13
- MIIDljCCAn6gAwIBAgIBATANBgkqhkiG9w0BAQUFADBIMRIwEAYDVQQDDAlnZW1t
14
- YXN0ZXIxHjAcBgoJkiaJk/IsZAEZFg5qYW1lc3JvYmVydHNvbjESMBAGCgmSJomT
15
- 8ixkARkWAmV1MB4XDTE2MDYxNDE1MTQwOVoXDTE3MDYxNDE1MTQwOVowSDESMBAG
16
- A1UEAwwJZ2VtbWFzdGVyMR4wHAYKCZImiZPyLGQBGRYOamFtZXNyb2JlcnRzb24x
17
- EjAQBgoJkiaJk/IsZAEZFgJldTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoC
18
- ggEBALc/0KGICil5uPNrZZUisbHWa1tPdaZoRfO03/t5KI1XdnT/1K8bcuw8j5Fb
19
- 2e+2QnDfIHVj3F9hZ4BtA1Z4lF7PxlHtQ4SjhxBLeLmcZtX8ZFIi73PLNItwtOf/
20
- CSI8oGyNxFUKzcbNtRTat8+jRUSs15vUrfxQD/q0RAZTnfamLj1b6ijrL1nqk36T
21
- x4wkEEdyPwou7I8qKJcaYHVrYCrbR5x9ZD77fqsLPNkIoI+SoBQa39+Ph+EjpXHV
22
- qg4bOklAI3Wmn/nQhepuQ9dCQM5zIn85WpwAiK5QeIaqrvnGuBztW5KRBa1diQit
23
- aGKlLl962VtYTp+uALhDBHo/z00CAwEAAaOBijCBhzAJBgNVHRMEAjAAMAsGA1Ud
24
- DwQEAwIEsDAdBgNVHQ4EFgQUnLL92rOFDH49Ig6knBxlNicuha4wJgYDVR0RBB8w
25
- HYEbZ2VtbWFzdGVyQGphbWVzcm9iZXJ0c29uLmV1MCYGA1UdEgQfMB2BG2dlbW1h
26
- c3RlckBqYW1lc3JvYmVydHNvbi5ldTANBgkqhkiG9w0BAQUFAAOCAQEADi7zgG/H
27
- OjC7uEVwwyhHNAt2lW+FdETDGpvJmcB/Z5lB4gbDEozB+hOnTd3oKH8DZeoyEi/Y
28
- 3QmxuNDvAKeNV5bngMxG/5k+zeY4tenyK2K1VVzlV8zgfnd3JWpqbDGRjOYqV+2K
29
- IZxoUnra09diNAo5c74oOaxfS75Tfle2zwjDLHAJat+kxhVmsnMXBSDpE4RDz8E3
30
- aWA3AnmwsgbUbHlniNMwSgSn8JzmCp0vRRIyN1Lw2rmoX1IXsAsAk1t/2RwBn+LC
31
- FWN+XXB3cbVDsx+uRkGyPtPZZbuqezbtXZUMmv+4kceg02I8lDt4PEk8Hrd5ybDi
32
- WVhFo+q8CNPc1Q==
13
+ MIIEXjCCAsagAwIBAgIBATANBgkqhkiG9w0BAQsFADAsMSowKAYDVQQDDCFnZW1t
14
+ YXN0ZXIvREM9amFtZXNyb2JlcnRzb24vREM9ZXUwHhcNMjIwNjA1MTQyODEwWhcN
15
+ MjMwNjA1MTQyODEwWjAsMSowKAYDVQQDDCFnZW1tYXN0ZXIvREM9amFtZXNyb2Jl
16
+ cnRzb24vREM9ZXUwggGiMA0GCSqGSIb3DQEBAQUAA4IBjwAwggGKAoIBgQDB2r9j
17
+ +uxF3HI0NHnPA3OVHXQMTV6vQ77vXc3isvXzdG3pi4h28ERvbpgR02tcXtU1dLSt
18
+ VNfQujRiC6xCuIimIF8xfNLorlA6r3GswLnxYJFL55mROlvj1FVUHYAesBf4sYtQ
19
+ YFKS48MSbjasTYhDfsI1CRDNtq9Eo0CCyCim4Dm3CLwkxRYAZ1QWgp7gvB1iQ4Mj
20
+ EYfdnrdiMNoAsecKBLk9Yw0m8z6tmUQfo9T3k+x8Ea75QdSGxntaK3J0xlf5a/4f
21
+ m1VghX2cQ3HRAD1AVRdqHFX+gsREvjwec556qDh7lYtfHmPJd/GtDSwNqvjy9asF
22
+ BKf8pn4lf6uS7FEt2ZyTTRchp93Mt02iaS6YRS9kMFKgG8YpouW2GznM1zMzv+XZ
23
+ YuQHJoOhuZ9Yw54IdnYN8yvDTYMIPXG7LfH4mSAr+iTt4qIZEswu279BOQtLPq3b
24
+ Hmy4cwTnyQTDajWKoImEJJN28+tgJdU2zCkMqvQSsjY68mHGPyGJXmJN16UCAwEA
25
+ AaOBijCBhzAJBgNVHRMEAjAAMAsGA1UdDwQEAwIEsDAdBgNVHQ4EFgQUpxd/Tg/i
26
+ 83KgE4jyTzmlctVduJAwJgYDVR0RBB8wHYEbZ2VtbWFzdGVyQGphbWVzcm9iZXJ0
27
+ c29uLmV1MCYGA1UdEgQfMB2BG2dlbW1hc3RlckBqYW1lc3JvYmVydHNvbi5ldTAN
28
+ BgkqhkiG9w0BAQsFAAOCAYEAPwPAfdrfaQUmnJaesnMEEUOQ5upX/b+yCFj9SfKK
29
+ Fck0rByt9NBsirEBry7aNLUyjkMj+V93aqBg//oPV5vPzM2xp80NMYM8qeDk8UJB
30
+ cnqZdFqk8FsBPqiDTRjg179pdwdzqVVcdAqEyVd18VkTMFzUKbNJE7tqfabOfCen
31
+ Oo4ni1HNy8Aqu797NELsTpQRb91XqOCH3Hng4YBAs0+4oQypYY9KzD2pcWm/1hOn
32
+ IJIl1VlJNjbqJWv+gbaHQ7wOWl/tBqrf7n+UZMLxNfbclGP9TbOYloAzObYkZuMV
33
+ e5s+i7M0nmQ1Z2KQ6cnRCNJwK5Dkg75a5qWFOHBnagZJcBJlbGRfN2X5NMwdwBZO
34
+ 1c2upWBy8foq/Cetit2N/FEHChGL+C5FoH3p0BaGwN2SRo/oEFW2fNfQ0NBEABUd
35
+ oqvc3p7tQQ8TBXi5/P5xcUMji2AZMfhdsiyaryALvI8GqlkxzqEi4Wg6/9b9IuGt
36
+ mWcJtQkAA9fjV30nkUDK9T5G
33
37
  -----END CERTIFICATE-----
34
- date: 2016-06-14 00:00:00.000000000 Z
38
+ date: 2022-06-05 00:00:00.000000000 Z
35
39
  dependencies:
36
40
  - !ruby/object:Gem::Dependency
37
41
  name: nokorexi
@@ -39,22 +43,22 @@ dependencies:
39
43
  requirements:
40
44
  - - "~>"
41
45
  - !ruby/object:Gem::Version
42
- version: '0.3'
46
+ version: '0.7'
43
47
  - - ">="
44
48
  - !ruby/object:Gem::Version
45
- version: 0.3.1
49
+ version: 0.7.0
46
50
  type: :runtime
47
51
  prerelease: false
48
52
  version_requirements: !ruby/object:Gem::Requirement
49
53
  requirements:
50
54
  - - "~>"
51
55
  - !ruby/object:Gem::Version
52
- version: '0.3'
56
+ version: '0.7'
53
57
  - - ">="
54
58
  - !ruby/object:Gem::Version
55
- version: 0.3.1
59
+ version: 0.7.0
56
60
  description:
57
- email: james@r0bertson.co.uk
61
+ email: digital.robertson@gmail.com
58
62
  executables: []
59
63
  extensions: []
60
64
  extra_rdoc_files: []
@@ -79,8 +83,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
79
83
  - !ruby/object:Gem::Version
80
84
  version: '0'
81
85
  requirements: []
82
- rubyforge_project:
83
- rubygems_version: 2.5.1
86
+ rubygems_version: 3.2.22
84
87
  signing_key:
85
88
  specification_version: 4
86
89
  summary: Scrapes the tables from a MediaWiki page.
metadata.gz.sig CHANGED
Binary file