mediawiki_table_scraper 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 82745728df615aa53c1e9275ddfabdd1476217f8
4
- data.tar.gz: 9a1115015b99c30cbea93f6947ce0c1fc622eff0
2
+ SHA256:
3
+ metadata.gz: ea524b1fbce282df1f9a05177d7155af511ca8590edbb7ca4bcce4f00c8f2667
4
+ data.tar.gz: d398ddb0aa66b182022792e0c17d87cb7af6cbd2d64b3fdfaf36ebfe484041a6
5
5
  SHA512:
6
- metadata.gz: 82e76b145d542b0b953d3600ededb71776e36941ec01b0d3442c937289d7d9c481a302e3f387d5cc49e028f40061338fc92032f6f8c033d96441b73b8a9b1122
7
- data.tar.gz: 734127c59510a08c4171f6eff361b9b413eeb64775f4e1a3d6debef66fcb7211865a06da72cb7dadd37514ce529f3df67b10d91cf167a10375865836def10dc9
6
+ metadata.gz: 271ad8b61104df171b564b2db3f18b1a3af0f13fc9120e46f8d256e1ef4603558ac754e3a32ac5972dfdce28958ed02f08c952358123c69c33cfd50c3c1578ee
7
+ data.tar.gz: 436fdbd37167105b68a50b8d9910426fddc72e64b3626ec92f7190aaa10da200279f258a8bfae459702cc244d49650435cc745a49af2d2d1306c619f6ea80fdb
checksums.yaml.gz.sig CHANGED
Binary file
@@ -10,40 +10,44 @@ class MediaWikiTableScraper
10
10
 
11
11
  attr_reader :tables
12
12
 
13
+
13
14
  def initialize(url)
14
15
 
15
16
  doc = Nokorexi.new(url).to_doc
16
17
 
17
18
  tables = doc.root.css('.wikitable')
19
+
18
20
  # Fetch the records as an array of hash records for each table
19
21
 
20
- @tables = tables.map do |table|
22
+ @tables = tables.map.with_index do |table, i|
21
23
 
22
- rows = table.xpath 'tr'
24
+ puts 'i: ' + i.inspect
25
+
26
+ rows = table.xpath 'tbody/tr'
23
27
 
24
28
  # fetch the column names
25
29
  labels = rows.shift.xpath 'th/text()'
26
- names = labels.map {|x| x.downcase.to_sym }
30
+ names = labels.map {|x| x.chomp.downcase.to_sym }
27
31
 
28
- a = rows.map do |row|
32
+ a = rows.map do |row|
29
33
 
30
- row.xpath('td').map do |x|
34
+ row.xpath('td').map do |col|
31
35
 
32
- if x.has_elements? then
36
+ if col.has_elements? then
33
37
 
34
- x.children.map do |c|
38
+ col.children.map do |c|
35
39
  c.is_a?(String) ? c : c.xml.gsub(/<\/?\w+[^>]*>/,'')
36
40
  end.join ' '
37
41
 
38
42
  else
39
- x.text.to_s
43
+ col.text.to_s
40
44
  end
41
45
 
42
46
  end
43
47
 
44
48
  end
45
49
 
46
- a2 = a.map {|rows| names.zip(rows).to_h }
50
+ a.map {|rows| names.zip(rows).to_h }
47
51
 
48
52
  end
49
53
  end
@@ -53,4 +57,3 @@ class MediaWikiTableScraper
53
57
  end
54
58
 
55
59
  end
56
-
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mediawiki_table_scraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Robertson
@@ -10,28 +10,32 @@ bindir: bin
10
10
  cert_chain:
11
11
  - |
12
12
  -----BEGIN CERTIFICATE-----
13
- MIIDljCCAn6gAwIBAgIBATANBgkqhkiG9w0BAQUFADBIMRIwEAYDVQQDDAlnZW1t
14
- YXN0ZXIxHjAcBgoJkiaJk/IsZAEZFg5qYW1lc3JvYmVydHNvbjESMBAGCgmSJomT
15
- 8ixkARkWAmV1MB4XDTE2MDYxNDE1MTQwOVoXDTE3MDYxNDE1MTQwOVowSDESMBAG
16
- A1UEAwwJZ2VtbWFzdGVyMR4wHAYKCZImiZPyLGQBGRYOamFtZXNyb2JlcnRzb24x
17
- EjAQBgoJkiaJk/IsZAEZFgJldTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoC
18
- ggEBALc/0KGICil5uPNrZZUisbHWa1tPdaZoRfO03/t5KI1XdnT/1K8bcuw8j5Fb
19
- 2e+2QnDfIHVj3F9hZ4BtA1Z4lF7PxlHtQ4SjhxBLeLmcZtX8ZFIi73PLNItwtOf/
20
- CSI8oGyNxFUKzcbNtRTat8+jRUSs15vUrfxQD/q0RAZTnfamLj1b6ijrL1nqk36T
21
- x4wkEEdyPwou7I8qKJcaYHVrYCrbR5x9ZD77fqsLPNkIoI+SoBQa39+Ph+EjpXHV
22
- qg4bOklAI3Wmn/nQhepuQ9dCQM5zIn85WpwAiK5QeIaqrvnGuBztW5KRBa1diQit
23
- aGKlLl962VtYTp+uALhDBHo/z00CAwEAAaOBijCBhzAJBgNVHRMEAjAAMAsGA1Ud
24
- DwQEAwIEsDAdBgNVHQ4EFgQUnLL92rOFDH49Ig6knBxlNicuha4wJgYDVR0RBB8w
25
- HYEbZ2VtbWFzdGVyQGphbWVzcm9iZXJ0c29uLmV1MCYGA1UdEgQfMB2BG2dlbW1h
26
- c3RlckBqYW1lc3JvYmVydHNvbi5ldTANBgkqhkiG9w0BAQUFAAOCAQEADi7zgG/H
27
- OjC7uEVwwyhHNAt2lW+FdETDGpvJmcB/Z5lB4gbDEozB+hOnTd3oKH8DZeoyEi/Y
28
- 3QmxuNDvAKeNV5bngMxG/5k+zeY4tenyK2K1VVzlV8zgfnd3JWpqbDGRjOYqV+2K
29
- IZxoUnra09diNAo5c74oOaxfS75Tfle2zwjDLHAJat+kxhVmsnMXBSDpE4RDz8E3
30
- aWA3AnmwsgbUbHlniNMwSgSn8JzmCp0vRRIyN1Lw2rmoX1IXsAsAk1t/2RwBn+LC
31
- FWN+XXB3cbVDsx+uRkGyPtPZZbuqezbtXZUMmv+4kceg02I8lDt4PEk8Hrd5ybDi
32
- WVhFo+q8CNPc1Q==
13
+ MIIEXjCCAsagAwIBAgIBATANBgkqhkiG9w0BAQsFADAsMSowKAYDVQQDDCFnZW1t
14
+ YXN0ZXIvREM9amFtZXNyb2JlcnRzb24vREM9ZXUwHhcNMjIwNjA1MTQyODEwWhcN
15
+ MjMwNjA1MTQyODEwWjAsMSowKAYDVQQDDCFnZW1tYXN0ZXIvREM9amFtZXNyb2Jl
16
+ cnRzb24vREM9ZXUwggGiMA0GCSqGSIb3DQEBAQUAA4IBjwAwggGKAoIBgQDB2r9j
17
+ +uxF3HI0NHnPA3OVHXQMTV6vQ77vXc3isvXzdG3pi4h28ERvbpgR02tcXtU1dLSt
18
+ VNfQujRiC6xCuIimIF8xfNLorlA6r3GswLnxYJFL55mROlvj1FVUHYAesBf4sYtQ
19
+ YFKS48MSbjasTYhDfsI1CRDNtq9Eo0CCyCim4Dm3CLwkxRYAZ1QWgp7gvB1iQ4Mj
20
+ EYfdnrdiMNoAsecKBLk9Yw0m8z6tmUQfo9T3k+x8Ea75QdSGxntaK3J0xlf5a/4f
21
+ m1VghX2cQ3HRAD1AVRdqHFX+gsREvjwec556qDh7lYtfHmPJd/GtDSwNqvjy9asF
22
+ BKf8pn4lf6uS7FEt2ZyTTRchp93Mt02iaS6YRS9kMFKgG8YpouW2GznM1zMzv+XZ
23
+ YuQHJoOhuZ9Yw54IdnYN8yvDTYMIPXG7LfH4mSAr+iTt4qIZEswu279BOQtLPq3b
24
+ Hmy4cwTnyQTDajWKoImEJJN28+tgJdU2zCkMqvQSsjY68mHGPyGJXmJN16UCAwEA
25
+ AaOBijCBhzAJBgNVHRMEAjAAMAsGA1UdDwQEAwIEsDAdBgNVHQ4EFgQUpxd/Tg/i
26
+ 83KgE4jyTzmlctVduJAwJgYDVR0RBB8wHYEbZ2VtbWFzdGVyQGphbWVzcm9iZXJ0
27
+ c29uLmV1MCYGA1UdEgQfMB2BG2dlbW1hc3RlckBqYW1lc3JvYmVydHNvbi5ldTAN
28
+ BgkqhkiG9w0BAQsFAAOCAYEAPwPAfdrfaQUmnJaesnMEEUOQ5upX/b+yCFj9SfKK
29
+ Fck0rByt9NBsirEBry7aNLUyjkMj+V93aqBg//oPV5vPzM2xp80NMYM8qeDk8UJB
30
+ cnqZdFqk8FsBPqiDTRjg179pdwdzqVVcdAqEyVd18VkTMFzUKbNJE7tqfabOfCen
31
+ Oo4ni1HNy8Aqu797NELsTpQRb91XqOCH3Hng4YBAs0+4oQypYY9KzD2pcWm/1hOn
32
+ IJIl1VlJNjbqJWv+gbaHQ7wOWl/tBqrf7n+UZMLxNfbclGP9TbOYloAzObYkZuMV
33
+ e5s+i7M0nmQ1Z2KQ6cnRCNJwK5Dkg75a5qWFOHBnagZJcBJlbGRfN2X5NMwdwBZO
34
+ 1c2upWBy8foq/Cetit2N/FEHChGL+C5FoH3p0BaGwN2SRo/oEFW2fNfQ0NBEABUd
35
+ oqvc3p7tQQ8TBXi5/P5xcUMji2AZMfhdsiyaryALvI8GqlkxzqEi4Wg6/9b9IuGt
36
+ mWcJtQkAA9fjV30nkUDK9T5G
33
37
  -----END CERTIFICATE-----
34
- date: 2016-06-14 00:00:00.000000000 Z
38
+ date: 2022-06-05 00:00:00.000000000 Z
35
39
  dependencies:
36
40
  - !ruby/object:Gem::Dependency
37
41
  name: nokorexi
@@ -39,22 +43,22 @@ dependencies:
39
43
  requirements:
40
44
  - - "~>"
41
45
  - !ruby/object:Gem::Version
42
- version: '0.3'
46
+ version: '0.7'
43
47
  - - ">="
44
48
  - !ruby/object:Gem::Version
45
- version: 0.3.1
49
+ version: 0.7.0
46
50
  type: :runtime
47
51
  prerelease: false
48
52
  version_requirements: !ruby/object:Gem::Requirement
49
53
  requirements:
50
54
  - - "~>"
51
55
  - !ruby/object:Gem::Version
52
- version: '0.3'
56
+ version: '0.7'
53
57
  - - ">="
54
58
  - !ruby/object:Gem::Version
55
- version: 0.3.1
59
+ version: 0.7.0
56
60
  description:
57
- email: james@r0bertson.co.uk
61
+ email: digital.robertson@gmail.com
58
62
  executables: []
59
63
  extensions: []
60
64
  extra_rdoc_files: []
@@ -79,8 +83,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
79
83
  - !ruby/object:Gem::Version
80
84
  version: '0'
81
85
  requirements: []
82
- rubyforge_project:
83
- rubygems_version: 2.5.1
86
+ rubygems_version: 3.2.22
84
87
  signing_key:
85
88
  specification_version: 4
86
89
  summary: Scrapes the tables from a MediaWiki page.
metadata.gz.sig CHANGED
Binary file