parseline 1.0.2 → 1.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +140 -0
- data/lib/parseline.rb +106 -0
- data/lib/parseline/parser_line_csv.rb +1 -0
- data/lib/parseline/parser_line_fixed_width.rb +31 -12
- metadata +12 -11
data/README.rdoc
ADDED
@@ -0,0 +1,140 @@
|
|
1
|
+
The purpose of gem parseline is to help the developers to load external CSV and fixed width files.
|
2
|
+
|
3
|
+
===Installation
|
4
|
+
sudo gem sources --add http://gems.github.com
|
5
|
+
sudo gem install shairontoledo-parseline
|
6
|
+
or from rubyforge
|
7
|
+
sudo gem install parseline
|
8
|
+
|
9
|
+
===How to Use
|
10
|
+
A single data mapper
|
11
|
+
|
12
|
+
===With ActiveRecord
|
13
|
+
|
14
|
+
====Using CSV
|
15
|
+
You can use the files delimited by a character or using regexp. To demonstrate its we'll use a migration/table
|
16
|
+
|
17
|
+
class CreateProducts < ActiveRecord::Migration
|
18
|
+
def self.up
|
19
|
+
create_table :products do |t|
|
20
|
+
t.integer :code
|
21
|
+
t.string :name
|
22
|
+
t.boolean :in_stock
|
23
|
+
t.date :date
|
24
|
+
t.float :price
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.down
|
29
|
+
drop_table :products
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
and a given file called 'data.csv' with the content below.
|
34
|
+
|
35
|
+
1;PRODUT 1;Y;;11/21/2008;90.00
|
36
|
+
2;PRODUT 2;N;;11/22/2008;341.33
|
37
|
+
3;PRODUT 3;N;;11/01/2008;1.99
|
38
|
+
4;PRODUT 4;Y;;11/15/2008;34.98
|
39
|
+
5;PRODUT 5;N;;11/14/2008;130.44
|
40
|
+
6;PRODUT 6;Y;;11/05/2008;20.11
|
41
|
+
|
42
|
+
The descriptions of data layout are
|
43
|
+
|
44
|
+
product's code;name;if it is in stock;*reserved for future use*;date;price
|
45
|
+
|
46
|
+
As you can see we need format the column 'in stock' to boolean format and convert the date format from MM/DD/YYYY to YYYY-MM-DD.
|
47
|
+
For every parse.field call, you receive the field to format using a lambda block.
|
48
|
+
|
49
|
+
so, take a look at the ActiveRecord definition using the module ParseLine::CSV
|
50
|
+
require 'parseline'
|
51
|
+
|
52
|
+
class Product < ActiveRecord::Base
|
53
|
+
extend ParseLine::CSV
|
54
|
+
csv_layout :delimiter => ";" do |parse|
|
55
|
+
parse.field :code
|
56
|
+
parse.field :name
|
57
|
+
parse.ignore_field
|
58
|
+
parse.field :in_stock, lambda {|s| s == 'Y' }
|
59
|
+
parse.field :date , lambda {|d| d.gsub(/(\d{2})\/(\d{2})\/(\d{4})/,'\3-\1-\2') }
|
60
|
+
parse.field :price
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
Each column will be mapped into a field, except the column after :name that is being ignored.
|
65
|
+
|
66
|
+
===== Loading external file
|
67
|
+
There are two ways that load data, by line use method Product.load_line to return an instance of Product
|
68
|
+
|
69
|
+
data_file=File.readlines("data.csv")
|
70
|
+
@product=Product.load_line data_file.first
|
71
|
+
@product.save #data will go to database
|
72
|
+
|
73
|
+
|
74
|
+
or loading from file with all records returns an array of Products
|
75
|
+
|
76
|
+
@products=Product.load_lines "data.csv"
|
77
|
+
|
78
|
+
|
79
|
+
====Using Fixed Width
|
80
|
+
To load data with Fixed Width we need do a extend the module ParseLine::FixedWidth. It uses the same class methods to load data, the load_line and load_lines. See the file Fixed Width
|
81
|
+
|
82
|
+
000001PRODUT 1 Y 11/21/2008000090.00
|
83
|
+
000002PRODUT 2 N 11/22/2008000341.33
|
84
|
+
000003PRODUT 3 N 11/01/2008000001.99
|
85
|
+
000004PRODUT 4 Y 11/15/2008000034.98
|
86
|
+
000005PRODUT 5 N 11/14/2008000130.44
|
87
|
+
000006PRODUT 6 Y 11/05/2008000020.11
|
88
|
+
|
89
|
+
let's split those lines and define a model like
|
90
|
+
require 'parseline'
|
91
|
+
|
92
|
+
class Product < ActiveRecord::Base
|
93
|
+
extend ParseLine::FixedWidth
|
94
|
+
fixed_width_layout do |parse|
|
95
|
+
parse.field :code , 0..5
|
96
|
+
parse.field :name, 6..21
|
97
|
+
parse.field :in_stock, 22..22, lambda {|s| s == 'Y' }
|
98
|
+
parse.field :date , 24..33, lambda {|d| d.gsub(/(\d{2})\/(\d{2})\/(\d{4})/,'\3-\1-\2') }
|
99
|
+
parse.field :price, 34..42
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
After that
|
104
|
+
|
105
|
+
@products=Product.load_lines "data.txt"
|
106
|
+
|
107
|
+
===With whatever class
|
108
|
+
|
109
|
+
====Using Fixed Width
|
110
|
+
|
111
|
+
require 'rubygems'
|
112
|
+
require 'parseline'
|
113
|
+
|
114
|
+
class Person
|
115
|
+
|
116
|
+
attr_accessor :id
|
117
|
+
attr_accessor :age
|
118
|
+
|
119
|
+
extend ParseLine::FixedWidth
|
120
|
+
|
121
|
+
fixed_width_layout do |parse|
|
122
|
+
parse.field :id, 0..4
|
123
|
+
parse.field :age, 5..6
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
After that
|
128
|
+
|
129
|
+
@people=Person.load_lines "data.txt"
|
130
|
+
|
131
|
+
and
|
132
|
+
|
133
|
+
@people.each{|person| p "id=#{person.id} and age=#{person.age}" }
|
134
|
+
|
135
|
+
|
136
|
+
Remember to use the parse module
|
137
|
+
It's easy to do, so enjoy and make money with it!
|
138
|
+
|
139
|
+
|
140
|
+
|
data/lib/parseline.rb
CHANGED
@@ -1,5 +1,111 @@
|
|
1
1
|
#shairon.toledo@gmail.com
|
2
2
|
#http://www.hashcode.eti.br
|
3
|
+
#
|
4
|
+
#The purpose of gem parseline is to help the developers to load external CSV and fixed width files.
|
5
|
+
#
|
6
|
+
#===Installation
|
7
|
+
# sudo gem sources --add http://gems.github.com
|
8
|
+
# sudo gem install parseline
|
9
|
+
#
|
10
|
+
#
|
11
|
+
#===How to Use
|
12
|
+
#As single data mapper
|
13
|
+
#
|
14
|
+
#====Using CSV
|
15
|
+
#You can use the files delimited by a character or using regexp. To demonstrate its we'll use a migration/table
|
16
|
+
#
|
17
|
+
#class CreateProducts < ActiveRecord::Migration
|
18
|
+
# def self.up
|
19
|
+
# create_table :products do |t|
|
20
|
+
# t.integer :code
|
21
|
+
# t.string :name
|
22
|
+
# t.boolean :in_stock
|
23
|
+
# t.date :date
|
24
|
+
# t.float :price
|
25
|
+
# end
|
26
|
+
# end
|
27
|
+
#
|
28
|
+
# def self.down
|
29
|
+
# drop_table :products
|
30
|
+
# end
|
31
|
+
#end
|
32
|
+
#
|
33
|
+
#and a given file called 'data.csv' with the content below.
|
34
|
+
#
|
35
|
+
# 1;PRODUT 1;Y;;11/21/2008;90.00
|
36
|
+
# 2;PRODUT 2;N;;11/22/2008;341.33
|
37
|
+
# 3;PRODUT 3;N;;11/01/2008;1.99
|
38
|
+
# 4;PRODUT 4;Y;;11/15/2008;34.98
|
39
|
+
# 5;PRODUT 5;N;;11/14/2008;130.44
|
40
|
+
# 6;PRODUT 6;Y;;11/05/2008;20.11
|
41
|
+
#
|
42
|
+
#The descriptions of data layout are
|
43
|
+
#
|
44
|
+
# product's code;name;if it is in stock;*reserved for future use*;date;price
|
45
|
+
#
|
46
|
+
#As you can see we need format the column 'in stock' to boolean format and convert the date format from MM/DD/YYYY to YYYY-MM-DD.
|
47
|
+
#For every parse.field call, you receive the field to format using a lambda block.
|
48
|
+
#
|
49
|
+
#so, take a look at the ActiveRecord definition using the module ParseLine::CSV
|
50
|
+
# require 'parseline'
|
51
|
+
#
|
52
|
+
# class Product < ActiveRecord::Base
|
53
|
+
# extend ParseLine::CSV
|
54
|
+
# csv_layout :delimiter => ";" do |parse|
|
55
|
+
# parse.field :code
|
56
|
+
# parse.field :name
|
57
|
+
# parse.ignore_field
|
58
|
+
# parse.field :in_stock, lambda {|s| s == 'Y' }
|
59
|
+
# parse.field :date , lambda {|d| d.gsub(/(\d{2})\/(\d{2})\/(\d{4})/,'\3-\1-\2') }
|
60
|
+
# parse.field :price
|
61
|
+
# end
|
62
|
+
# end
|
63
|
+
#
|
64
|
+
#Each column will be mapped into a field, except the column after :name that is being ignored.
|
65
|
+
#
|
66
|
+
#===== Loading external file
|
67
|
+
#There are two ways that load data, by line use method Product.load_line to return an instance of Product
|
68
|
+
#
|
69
|
+
# data_file=File.readlines("data.csv")
|
70
|
+
# @product=Product.load_line data_file.first
|
71
|
+
# @product.save #data will go to database
|
72
|
+
#
|
73
|
+
#
|
74
|
+
#or loading from file with all records returns an array of Products
|
75
|
+
#
|
76
|
+
# @products=Product.load_lines "data.csv"
|
77
|
+
#
|
78
|
+
#
|
79
|
+
#====Using Fixed Width
|
80
|
+
#To load data with Fixed Width we need do a extend the module ParseLine::FixedWidth. It uses the same class methods to load data, the load_line and load_lines. See the file Fixed Width
|
81
|
+
#
|
82
|
+
# 000001PRODUT 1 Y 11/21/2008000090.00
|
83
|
+
# 000002PRODUT 2 N 11/22/2008000341.33
|
84
|
+
# 000003PRODUT 3 N 11/01/2008000001.99
|
85
|
+
# 000004PRODUT 4 Y 11/15/2008000034.98
|
86
|
+
# 000005PRODUT 5 N 11/14/2008000130.44
|
87
|
+
# 000006PRODUT 6 Y 11/05/2008000020.11
|
88
|
+
#
|
89
|
+
#let's split those lines and define a model like
|
90
|
+
# require 'parseline'
|
91
|
+
#
|
92
|
+
# class Product < ActiveRecord::Base
|
93
|
+
# extend ParseLine::FixedWidth
|
94
|
+
# fixed_width_layout do |parse|
|
95
|
+
# parse.field :code , 0..5
|
96
|
+
# parse.field :name, 6..21
|
97
|
+
# parse.field :in_stock, 22..22, lambda {|s| s == 'Y' }
|
98
|
+
# parse.field :date , 24..33, lambda {|d| d.gsub(/(\d{2})\/(\d{2})\/(\d{4})/,'\3-\1-\2') }
|
99
|
+
# parse.field :price, 34..42
|
100
|
+
# end
|
101
|
+
# end
|
102
|
+
#
|
103
|
+
#After that
|
104
|
+
#
|
105
|
+
# @products=Product.load_lines "data.csv"
|
106
|
+
#
|
107
|
+
#Remember to use the parse module
|
108
|
+
#It's easy to do, so enjoy and make money with it!
|
3
109
|
module ParseLine
|
4
110
|
require 'parseline/parser_line_csv'
|
5
111
|
require 'parseline/parser_line_fixed_width'
|
@@ -1,13 +1,13 @@
|
|
1
1
|
#shairon.toledo@gmail.com
|
2
2
|
#http://www.hashcode.eti.br
|
3
3
|
module ParseLine::FixedWidth
|
4
|
-
|
4
|
+
|
5
5
|
self.class_eval do
|
6
|
-
|
6
|
+
|
7
7
|
def fixed_width_layout(&block)
|
8
8
|
yield self
|
9
9
|
end
|
10
|
-
|
10
|
+
|
11
11
|
def parse_field(field,range,block=nil)
|
12
12
|
self.send(:class_variable_set,:@@parse_values,[]) unless self.class_variable_defined?(:@@parse_values)
|
13
13
|
self.send(:class_variable_get,:@@parse_values) << [field,range,block]
|
@@ -16,12 +16,34 @@ module ParseLine::FixedWidth
|
|
16
16
|
def field(field,range,block=nil)
|
17
17
|
parse_field(field,range,block)
|
18
18
|
end
|
19
|
-
|
20
|
-
def load_lines(filepath)
|
21
|
-
|
22
|
-
|
23
|
-
|
19
|
+
|
20
|
+
def load_lines(filepath, options={})
|
21
|
+
case options[:except].class.to_s
|
22
|
+
when "Array"
|
23
|
+
lines=[]
|
24
|
+
File.open(filepath).each_with_index do |line, i|
|
25
|
+
if options[:length] == nil
|
26
|
+
lines << load_line(line) unless options[:except].include?(i+1)
|
27
|
+
else
|
28
|
+
lines << load_line(line) unless options[:except].include?(i+1) if (line.size - 2) == options[:length].to_i
|
29
|
+
end
|
30
|
+
end
|
31
|
+
lines
|
32
|
+
when "Regexp"
|
33
|
+
lines=[]
|
34
|
+
File.open(filepath).each do |line|
|
35
|
+
if options[:length] == nil
|
36
|
+
lines << load_line(line) if options[:except] !~ line
|
37
|
+
else
|
38
|
+
lines << load_line(line) if options[:except] !~ line if (line.size - 2) == options[:length].to_i
|
39
|
+
end
|
40
|
+
end
|
41
|
+
lines
|
42
|
+
else
|
43
|
+
File.open(filepath).map{|line| load_line(line)}
|
44
|
+
end
|
24
45
|
end
|
46
|
+
|
25
47
|
def load_line(line)
|
26
48
|
this=self.new
|
27
49
|
begin
|
@@ -37,8 +59,5 @@ module ParseLine::FixedWidth
|
|
37
59
|
raise "ParseLine::MalformedLayoutOrLine: '#{line.to_s.strip}', size: #{line.to_s.size}"
|
38
60
|
end
|
39
61
|
end
|
40
|
-
|
41
62
|
end
|
42
|
-
|
43
|
-
end
|
44
|
-
|
63
|
+
end
|
metadata
CHANGED
@@ -1,36 +1,37 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: parseline
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shairon Toledo
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date:
|
12
|
+
date: 2009-05-04 00:00:00 -04:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|
16
|
-
description:
|
16
|
+
description: The purpose of gem parseline is to help the developers to load external CSV and fixed width files.
|
17
17
|
email: shairon.toledo@gmail.com
|
18
18
|
executables: []
|
19
19
|
|
20
20
|
extensions: []
|
21
21
|
|
22
|
-
extra_rdoc_files:
|
23
|
-
|
22
|
+
extra_rdoc_files:
|
23
|
+
- README.rdoc
|
24
24
|
files:
|
25
|
-
- lib/parseline
|
25
|
+
- lib/parseline.rb
|
26
26
|
- lib/parseline/parser_line_csv.rb
|
27
27
|
- lib/parseline/parser_line_fixed_width.rb
|
28
|
-
-
|
28
|
+
- README.rdoc
|
29
29
|
has_rdoc: false
|
30
30
|
homepage: http://parseline.rubyforge.org/
|
31
31
|
post_install_message:
|
32
|
-
rdoc_options:
|
33
|
-
|
32
|
+
rdoc_options:
|
33
|
+
- --main
|
34
|
+
- README.rdoc
|
34
35
|
require_paths:
|
35
36
|
- lib
|
36
37
|
required_ruby_version: !ruby/object:Gem::Requirement
|
@@ -51,6 +52,6 @@ rubyforge_project: parseline
|
|
51
52
|
rubygems_version: 1.3.1
|
52
53
|
signing_key:
|
53
54
|
specification_version: 2
|
54
|
-
summary:
|
55
|
+
summary: Load from files to ActiveRecord
|
55
56
|
test_files: []
|
56
57
|
|