clasrip 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. data/bin/clasrip +25 -29
  2. data/clasrip.gemspec +2 -2
  3. data/lib/clasrip.rb +14 -1
  4. metadata +15 -15
data/bin/clasrip CHANGED
@@ -1,37 +1,33 @@
1
1
  #!/usr/bin/env ruby
2
- $:.push("./lib")
3
2
 
4
- if $0 == __FILE__
5
- require "clasrip"
6
- require "clasrip/sql"
7
- require "date"
3
+ require "clasrip"
4
+ require "clasrip/sql"
5
+ require "date"
8
6
 
9
- if ARGV.size >= 2 and ARGV[0] == "rip"
10
- scraper = Clasrip::Scraper.new(1971, Date.today.year + 1)
11
- sql = Clasrip::SQL.new(ARGV[1])
12
-
13
- last_record = Clasrip::SQL::Classification.last
14
- if last_record != nil
15
- last_record = last_record.attributes
16
- last_record.delete(:id)
17
- last_record[:date_of_classification] = last_record[:date_of_classification].iso8601
18
-
19
- date = Clasrip::SQL::Classification.last.date_of_classification
20
- scraper.set_date(date.year, date.month-1, 1)
21
- #puts "Set date to: #{scraper.get_date}"
7
+ if ARGV.size >= 2 and ARGV[0] == "rip"
8
+ scraper = Clasrip::Scraper.new(1971, Date.today.year + 1)
9
+ sql = Clasrip::SQL.new(ARGV[1])
22
10
 
23
- print "Finding last record (#{last_record[:title]})... "
24
- scraper.each do |record|
25
- break if record == last_record
26
- end
27
- puts "Found!"
28
- end
11
+ last_record = Clasrip::SQL::Classification.last
12
+ if last_record != nil
13
+ last_record = last_record.attributes
14
+ last_record.delete(:id)
15
+ last_record[:date_of_classification] = last_record[:date_of_classification].iso8601
16
+
17
+ date = Clasrip::SQL::Classification.last.date_of_classification
18
+ scraper.set_date(date.year, date.month-1, 1)
29
19
 
30
- date = scraper.get_date
31
- scraper.each do |record|
32
- puts "(#{record[:date_of_classification]}) #{record[:title]} [#{record[:classification]}]"
33
- sql.add_record(record)
34
- end
20
+ print "Finding last record (#{last_record[:title]})... "
21
+ scraper.each do |record|
22
+ break if record == last_record
23
+ end
24
+ puts "Found!"
25
+ end
26
+
27
+ date = scraper.get_date
28
+ scraper.each do |record|
29
+ puts "(#{record[:date_of_classification]}) #{record[:title]} [#{record[:classification]}]"
30
+ sql.add_record(record)
35
31
  end
36
32
  end
37
33
 
data/clasrip.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "clasrip"
8
- s.version = "0.1.0"
8
+ s.version = "0.1.1"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Brendan Molloy"]
12
- s.date = "2012-02-25"
12
+ s.date = "2012-02-26"
13
13
  s.description = "A scraper for classification.gov.au"
14
14
  s.email = "brendan@bbqsrc.net"
15
15
  s.executables = ["clasrip"]
data/lib/clasrip.rb CHANGED
@@ -5,7 +5,7 @@ module Clasrip
5
5
  module Version
6
6
  MAJOR = 0
7
7
  MINOR = 1
8
- PATCH = 0
8
+ PATCH = 1
9
9
  BUILD = nil
10
10
  def self.to_s
11
11
  [MAJOR, MINOR, PATCH, BUILD].compact.join('.')
@@ -111,6 +111,16 @@ module Clasrip
111
111
  end
112
112
  end
113
113
 
114
+ def ensure_correct_encoding(s)
115
+ s.force_encoding("utf-8")
116
+ return s if s.valid_encoding?
117
+
118
+ puts ("Invalid: " + s)
119
+ s.encode!("utf-8", "iso-8859-1")
120
+ raise "Could not enforce UTF-8 encoding: '#{s}'" unless s.valid_encoding?
121
+ s
122
+ end
123
+
114
124
  def new_enum
115
125
  @records = Enumerator.new do |y|
116
126
  @dates[0].each do |first_date|
@@ -133,6 +143,9 @@ module Clasrip
133
143
  parse_table(table).each do |record|
134
144
  form = get_classification(record[:original_url]) or next
135
145
  record.merge!(parse_classification(form))
146
+ record.each_pair do |k,v|
147
+ record[k] = ensure_correct_encoding(v)
148
+ end
136
149
  y << record
137
150
  end
138
151
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: clasrip
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-02-25 00:00:00.000000000 Z
12
+ date: 2012-02-26 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: nokogiri
16
- requirement: &70095153397460 !ruby/object:Gem::Requirement
16
+ requirement: &70149198193640 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: 1.5.0
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *70095153397460
24
+ version_requirements: *70149198193640
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: data_mapper
27
- requirement: &70095153396980 !ruby/object:Gem::Requirement
27
+ requirement: &70149198192960 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: 1.2.0
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *70095153396980
35
+ version_requirements: *70149198192960
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: shoulda
38
- requirement: &70095153396500 !ruby/object:Gem::Requirement
38
+ requirement: &70149198192360 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: '0'
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *70095153396500
46
+ version_requirements: *70149198192360
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: rdoc
49
- requirement: &70095153395980 !ruby/object:Gem::Requirement
49
+ requirement: &70149198191660 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ~>
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: '3.12'
55
55
  type: :development
56
56
  prerelease: false
57
- version_requirements: *70095153395980
57
+ version_requirements: *70149198191660
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: bundler
60
- requirement: &70095153395480 !ruby/object:Gem::Requirement
60
+ requirement: &70149198190640 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ~>
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: 1.0.0
66
66
  type: :development
67
67
  prerelease: false
68
- version_requirements: *70095153395480
68
+ version_requirements: *70149198190640
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: jeweler
71
- requirement: &70095153394980 !ruby/object:Gem::Requirement
71
+ requirement: &70149198206040 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ~>
@@ -76,7 +76,7 @@ dependencies:
76
76
  version: 1.8.3
77
77
  type: :development
78
78
  prerelease: false
79
- version_requirements: *70095153394980
79
+ version_requirements: *70149198206040
80
80
  description: A scraper for classification.gov.au
81
81
  email: brendan@bbqsrc.net
82
82
  executables:
@@ -112,7 +112,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
112
112
  version: '0'
113
113
  segments:
114
114
  - 0
115
- hash: -3829822103400611157
115
+ hash: -1633970238896182400
116
116
  required_rubygems_version: !ruby/object:Gem::Requirement
117
117
  none: false
118
118
  requirements: