mediawiki_table_scraper 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- checksums.yaml.gz.sig +0 -0
- data/lib/mediawiki_table_scraper.rb +13 -10
- data.tar.gz.sig +0 -0
- metadata +32 -29
- metadata.gz.sig +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: ea524b1fbce282df1f9a05177d7155af511ca8590edbb7ca4bcce4f00c8f2667
|
4
|
+
data.tar.gz: d398ddb0aa66b182022792e0c17d87cb7af6cbd2d64b3fdfaf36ebfe484041a6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 271ad8b61104df171b564b2db3f18b1a3af0f13fc9120e46f8d256e1ef4603558ac754e3a32ac5972dfdce28958ed02f08c952358123c69c33cfd50c3c1578ee
|
7
|
+
data.tar.gz: 436fdbd37167105b68a50b8d9910426fddc72e64b3626ec92f7190aaa10da200279f258a8bfae459702cc244d49650435cc745a49af2d2d1306c619f6ea80fdb
|
checksums.yaml.gz.sig
CHANGED
Binary file
|
@@ -10,40 +10,44 @@ class MediaWikiTableScraper
|
|
10
10
|
|
11
11
|
attr_reader :tables
|
12
12
|
|
13
|
+
|
13
14
|
def initialize(url)
|
14
15
|
|
15
16
|
doc = Nokorexi.new(url).to_doc
|
16
17
|
|
17
18
|
tables = doc.root.css('.wikitable')
|
19
|
+
|
18
20
|
# Fetch the records as an array of hash records for each table
|
19
21
|
|
20
|
-
@tables = tables.map do |table|
|
22
|
+
@tables = tables.map.with_index do |table, i|
|
21
23
|
|
22
|
-
|
24
|
+
puts 'i: ' + i.inspect
|
25
|
+
|
26
|
+
rows = table.xpath 'tbody/tr'
|
23
27
|
|
24
28
|
# fetch the column names
|
25
29
|
labels = rows.shift.xpath 'th/text()'
|
26
|
-
names = labels.map {|x| x.downcase.to_sym }
|
30
|
+
names = labels.map {|x| x.chomp.downcase.to_sym }
|
27
31
|
|
28
|
-
a = rows.map do |row|
|
32
|
+
a = rows.map do |row|
|
29
33
|
|
30
|
-
row.xpath('td').map do |
|
34
|
+
row.xpath('td').map do |col|
|
31
35
|
|
32
|
-
if
|
36
|
+
if col.has_elements? then
|
33
37
|
|
34
|
-
|
38
|
+
col.children.map do |c|
|
35
39
|
c.is_a?(String) ? c : c.xml.gsub(/<\/?\w+[^>]*>/,'')
|
36
40
|
end.join ' '
|
37
41
|
|
38
42
|
else
|
39
|
-
|
43
|
+
col.text.to_s
|
40
44
|
end
|
41
45
|
|
42
46
|
end
|
43
47
|
|
44
48
|
end
|
45
49
|
|
46
|
-
|
50
|
+
a.map {|rows| names.zip(rows).to_h }
|
47
51
|
|
48
52
|
end
|
49
53
|
end
|
@@ -53,4 +57,3 @@ class MediaWikiTableScraper
|
|
53
57
|
end
|
54
58
|
|
55
59
|
end
|
56
|
-
|
data.tar.gz.sig
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mediawiki_table_scraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Robertson
|
@@ -10,28 +10,32 @@ bindir: bin
|
|
10
10
|
cert_chain:
|
11
11
|
- |
|
12
12
|
-----BEGIN CERTIFICATE-----
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
13
|
+
MIIEXjCCAsagAwIBAgIBATANBgkqhkiG9w0BAQsFADAsMSowKAYDVQQDDCFnZW1t
|
14
|
+
YXN0ZXIvREM9amFtZXNyb2JlcnRzb24vREM9ZXUwHhcNMjIwNjA1MTQyODEwWhcN
|
15
|
+
MjMwNjA1MTQyODEwWjAsMSowKAYDVQQDDCFnZW1tYXN0ZXIvREM9amFtZXNyb2Jl
|
16
|
+
cnRzb24vREM9ZXUwggGiMA0GCSqGSIb3DQEBAQUAA4IBjwAwggGKAoIBgQDB2r9j
|
17
|
+
+uxF3HI0NHnPA3OVHXQMTV6vQ77vXc3isvXzdG3pi4h28ERvbpgR02tcXtU1dLSt
|
18
|
+
VNfQujRiC6xCuIimIF8xfNLorlA6r3GswLnxYJFL55mROlvj1FVUHYAesBf4sYtQ
|
19
|
+
YFKS48MSbjasTYhDfsI1CRDNtq9Eo0CCyCim4Dm3CLwkxRYAZ1QWgp7gvB1iQ4Mj
|
20
|
+
EYfdnrdiMNoAsecKBLk9Yw0m8z6tmUQfo9T3k+x8Ea75QdSGxntaK3J0xlf5a/4f
|
21
|
+
m1VghX2cQ3HRAD1AVRdqHFX+gsREvjwec556qDh7lYtfHmPJd/GtDSwNqvjy9asF
|
22
|
+
BKf8pn4lf6uS7FEt2ZyTTRchp93Mt02iaS6YRS9kMFKgG8YpouW2GznM1zMzv+XZ
|
23
|
+
YuQHJoOhuZ9Yw54IdnYN8yvDTYMIPXG7LfH4mSAr+iTt4qIZEswu279BOQtLPq3b
|
24
|
+
Hmy4cwTnyQTDajWKoImEJJN28+tgJdU2zCkMqvQSsjY68mHGPyGJXmJN16UCAwEA
|
25
|
+
AaOBijCBhzAJBgNVHRMEAjAAMAsGA1UdDwQEAwIEsDAdBgNVHQ4EFgQUpxd/Tg/i
|
26
|
+
83KgE4jyTzmlctVduJAwJgYDVR0RBB8wHYEbZ2VtbWFzdGVyQGphbWVzcm9iZXJ0
|
27
|
+
c29uLmV1MCYGA1UdEgQfMB2BG2dlbW1hc3RlckBqYW1lc3JvYmVydHNvbi5ldTAN
|
28
|
+
BgkqhkiG9w0BAQsFAAOCAYEAPwPAfdrfaQUmnJaesnMEEUOQ5upX/b+yCFj9SfKK
|
29
|
+
Fck0rByt9NBsirEBry7aNLUyjkMj+V93aqBg//oPV5vPzM2xp80NMYM8qeDk8UJB
|
30
|
+
cnqZdFqk8FsBPqiDTRjg179pdwdzqVVcdAqEyVd18VkTMFzUKbNJE7tqfabOfCen
|
31
|
+
Oo4ni1HNy8Aqu797NELsTpQRb91XqOCH3Hng4YBAs0+4oQypYY9KzD2pcWm/1hOn
|
32
|
+
IJIl1VlJNjbqJWv+gbaHQ7wOWl/tBqrf7n+UZMLxNfbclGP9TbOYloAzObYkZuMV
|
33
|
+
e5s+i7M0nmQ1Z2KQ6cnRCNJwK5Dkg75a5qWFOHBnagZJcBJlbGRfN2X5NMwdwBZO
|
34
|
+
1c2upWBy8foq/Cetit2N/FEHChGL+C5FoH3p0BaGwN2SRo/oEFW2fNfQ0NBEABUd
|
35
|
+
oqvc3p7tQQ8TBXi5/P5xcUMji2AZMfhdsiyaryALvI8GqlkxzqEi4Wg6/9b9IuGt
|
36
|
+
mWcJtQkAA9fjV30nkUDK9T5G
|
33
37
|
-----END CERTIFICATE-----
|
34
|
-
date:
|
38
|
+
date: 2022-06-05 00:00:00.000000000 Z
|
35
39
|
dependencies:
|
36
40
|
- !ruby/object:Gem::Dependency
|
37
41
|
name: nokorexi
|
@@ -39,22 +43,22 @@ dependencies:
|
|
39
43
|
requirements:
|
40
44
|
- - "~>"
|
41
45
|
- !ruby/object:Gem::Version
|
42
|
-
version: '0.
|
46
|
+
version: '0.7'
|
43
47
|
- - ">="
|
44
48
|
- !ruby/object:Gem::Version
|
45
|
-
version: 0.
|
49
|
+
version: 0.7.0
|
46
50
|
type: :runtime
|
47
51
|
prerelease: false
|
48
52
|
version_requirements: !ruby/object:Gem::Requirement
|
49
53
|
requirements:
|
50
54
|
- - "~>"
|
51
55
|
- !ruby/object:Gem::Version
|
52
|
-
version: '0.
|
56
|
+
version: '0.7'
|
53
57
|
- - ">="
|
54
58
|
- !ruby/object:Gem::Version
|
55
|
-
version: 0.
|
59
|
+
version: 0.7.0
|
56
60
|
description:
|
57
|
-
email:
|
61
|
+
email: digital.robertson@gmail.com
|
58
62
|
executables: []
|
59
63
|
extensions: []
|
60
64
|
extra_rdoc_files: []
|
@@ -79,8 +83,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
79
83
|
- !ruby/object:Gem::Version
|
80
84
|
version: '0'
|
81
85
|
requirements: []
|
82
|
-
|
83
|
-
rubygems_version: 2.5.1
|
86
|
+
rubygems_version: 3.2.22
|
84
87
|
signing_key:
|
85
88
|
specification_version: 4
|
86
89
|
summary: Scrapes the tables from a MediaWiki page.
|
metadata.gz.sig
CHANGED
Binary file
|