clasrip 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. data/bin/clasrip +25 -29
  2. data/clasrip.gemspec +2 -2
  3. data/lib/clasrip.rb +14 -1
  4. metadata +15 -15
data/bin/clasrip CHANGED
@@ -1,37 +1,33 @@
1
1
  #!/usr/bin/env ruby
2
- $:.push("./lib")
3
2
 
4
- if $0 == __FILE__
5
- require "clasrip"
6
- require "clasrip/sql"
7
- require "date"
3
+ require "clasrip"
4
+ require "clasrip/sql"
5
+ require "date"
8
6
 
9
- if ARGV.size >= 2 and ARGV[0] == "rip"
10
- scraper = Clasrip::Scraper.new(1971, Date.today.year + 1)
11
- sql = Clasrip::SQL.new(ARGV[1])
12
-
13
- last_record = Clasrip::SQL::Classification.last
14
- if last_record != nil
15
- last_record = last_record.attributes
16
- last_record.delete(:id)
17
- last_record[:date_of_classification] = last_record[:date_of_classification].iso8601
18
-
19
- date = Clasrip::SQL::Classification.last.date_of_classification
20
- scraper.set_date(date.year, date.month-1, 1)
21
- #puts "Set date to: #{scraper.get_date}"
7
+ if ARGV.size >= 2 and ARGV[0] == "rip"
8
+ scraper = Clasrip::Scraper.new(1971, Date.today.year + 1)
9
+ sql = Clasrip::SQL.new(ARGV[1])
22
10
 
23
- print "Finding last record (#{last_record[:title]})... "
24
- scraper.each do |record|
25
- break if record == last_record
26
- end
27
- puts "Found!"
28
- end
11
+ last_record = Clasrip::SQL::Classification.last
12
+ if last_record != nil
13
+ last_record = last_record.attributes
14
+ last_record.delete(:id)
15
+ last_record[:date_of_classification] = last_record[:date_of_classification].iso8601
16
+
17
+ date = Clasrip::SQL::Classification.last.date_of_classification
18
+ scraper.set_date(date.year, date.month-1, 1)
29
19
 
30
- date = scraper.get_date
31
- scraper.each do |record|
32
- puts "(#{record[:date_of_classification]}) #{record[:title]} [#{record[:classification]}]"
33
- sql.add_record(record)
34
- end
20
+ print "Finding last record (#{last_record[:title]})... "
21
+ scraper.each do |record|
22
+ break if record == last_record
23
+ end
24
+ puts "Found!"
25
+ end
26
+
27
+ date = scraper.get_date
28
+ scraper.each do |record|
29
+ puts "(#{record[:date_of_classification]}) #{record[:title]} [#{record[:classification]}]"
30
+ sql.add_record(record)
35
31
  end
36
32
  end
37
33
 
data/clasrip.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "clasrip"
8
- s.version = "0.1.0"
8
+ s.version = "0.1.1"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Brendan Molloy"]
12
- s.date = "2012-02-25"
12
+ s.date = "2012-02-26"
13
13
  s.description = "A scraper for classification.gov.au"
14
14
  s.email = "brendan@bbqsrc.net"
15
15
  s.executables = ["clasrip"]
data/lib/clasrip.rb CHANGED
@@ -5,7 +5,7 @@ module Clasrip
5
5
  module Version
6
6
  MAJOR = 0
7
7
  MINOR = 1
8
- PATCH = 0
8
+ PATCH = 1
9
9
  BUILD = nil
10
10
  def self.to_s
11
11
  [MAJOR, MINOR, PATCH, BUILD].compact.join('.')
@@ -111,6 +111,16 @@ module Clasrip
111
111
  end
112
112
  end
113
113
 
114
+ def ensure_correct_encoding(s)
115
+ s.force_encoding("utf-8")
116
+ return s if s.valid_encoding?
117
+
118
+ puts ("Invalid: " + s)
119
+ s.encode!("utf-8", "iso-8859-1")
120
+ raise "Could not enforce UTF-8 encoding: '#{s}'" unless s.valid_encoding?
121
+ s
122
+ end
123
+
114
124
  def new_enum
115
125
  @records = Enumerator.new do |y|
116
126
  @dates[0].each do |first_date|
@@ -133,6 +143,9 @@ module Clasrip
133
143
  parse_table(table).each do |record|
134
144
  form = get_classification(record[:original_url]) or next
135
145
  record.merge!(parse_classification(form))
146
+ record.each_pair do |k,v|
147
+ record[k] = ensure_correct_encoding(v)
148
+ end
136
149
  y << record
137
150
  end
138
151
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: clasrip
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-02-25 00:00:00.000000000 Z
12
+ date: 2012-02-26 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: nokogiri
16
- requirement: &70095153397460 !ruby/object:Gem::Requirement
16
+ requirement: &70149198193640 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: 1.5.0
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *70095153397460
24
+ version_requirements: *70149198193640
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: data_mapper
27
- requirement: &70095153396980 !ruby/object:Gem::Requirement
27
+ requirement: &70149198192960 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: 1.2.0
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *70095153396980
35
+ version_requirements: *70149198192960
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: shoulda
38
- requirement: &70095153396500 !ruby/object:Gem::Requirement
38
+ requirement: &70149198192360 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: '0'
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *70095153396500
46
+ version_requirements: *70149198192360
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: rdoc
49
- requirement: &70095153395980 !ruby/object:Gem::Requirement
49
+ requirement: &70149198191660 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ~>
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: '3.12'
55
55
  type: :development
56
56
  prerelease: false
57
- version_requirements: *70095153395980
57
+ version_requirements: *70149198191660
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: bundler
60
- requirement: &70095153395480 !ruby/object:Gem::Requirement
60
+ requirement: &70149198190640 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ~>
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: 1.0.0
66
66
  type: :development
67
67
  prerelease: false
68
- version_requirements: *70095153395480
68
+ version_requirements: *70149198190640
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: jeweler
71
- requirement: &70095153394980 !ruby/object:Gem::Requirement
71
+ requirement: &70149198206040 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ~>
@@ -76,7 +76,7 @@ dependencies:
76
76
  version: 1.8.3
77
77
  type: :development
78
78
  prerelease: false
79
- version_requirements: *70095153394980
79
+ version_requirements: *70149198206040
80
80
  description: A scraper for classification.gov.au
81
81
  email: brendan@bbqsrc.net
82
82
  executables:
@@ -112,7 +112,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
112
112
  version: '0'
113
113
  segments:
114
114
  - 0
115
- hash: -3829822103400611157
115
+ hash: -1633970238896182400
116
116
  required_rubygems_version: !ruby/object:Gem::Requirement
117
117
  none: false
118
118
  requirements: