parseline 1.0.2 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +140 -0
- data/lib/parseline.rb +106 -0
- data/lib/parseline/parser_line_csv.rb +1 -0
- data/lib/parseline/parser_line_fixed_width.rb +31 -12
- metadata +12 -11
data/README.rdoc
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
The purpose of gem parseline is to help the developers to load external CSV and fixed width files.
|
|
2
|
+
|
|
3
|
+
===Installation
|
|
4
|
+
sudo gem sources --add http://gems.github.com
|
|
5
|
+
sudo gem install shairontoledo-parseline
|
|
6
|
+
or from rubyforge
|
|
7
|
+
sudo gem install parseline
|
|
8
|
+
|
|
9
|
+
===How to Use
|
|
10
|
+
A single data mapper
|
|
11
|
+
|
|
12
|
+
===With ActiveRecord
|
|
13
|
+
|
|
14
|
+
====Using CSV
|
|
15
|
+
You can use the files delimited by a character or using regexp. To demonstrate its we'll use a migration/table
|
|
16
|
+
|
|
17
|
+
class CreateProducts < ActiveRecord::Migration
|
|
18
|
+
def self.up
|
|
19
|
+
create_table :products do |t|
|
|
20
|
+
t.integer :code
|
|
21
|
+
t.string :name
|
|
22
|
+
t.boolean :in_stock
|
|
23
|
+
t.date :date
|
|
24
|
+
t.float :price
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def self.down
|
|
29
|
+
drop_table :products
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
and a given file called 'data.csv' with the content below.
|
|
34
|
+
|
|
35
|
+
1;PRODUT 1;Y;;11/21/2008;90.00
|
|
36
|
+
2;PRODUT 2;N;;11/22/2008;341.33
|
|
37
|
+
3;PRODUT 3;N;;11/01/2008;1.99
|
|
38
|
+
4;PRODUT 4;Y;;11/15/2008;34.98
|
|
39
|
+
5;PRODUT 5;N;;11/14/2008;130.44
|
|
40
|
+
6;PRODUT 6;Y;;11/05/2008;20.11
|
|
41
|
+
|
|
42
|
+
The descriptions of data layout are
|
|
43
|
+
|
|
44
|
+
product's code;name;if it is in stock;*reserved for future use*;date;price
|
|
45
|
+
|
|
46
|
+
As you can see we need format the column 'in stock' to boolean format and convert the date format from MM/DD/YYYY to YYYY-MM-DD.
|
|
47
|
+
For every parse.field call, you receive the field to format using a lambda block.
|
|
48
|
+
|
|
49
|
+
so, take a look at the ActiveRecord definition using the module ParseLine::CSV
|
|
50
|
+
require 'parseline'
|
|
51
|
+
|
|
52
|
+
class Product < ActiveRecord::Base
|
|
53
|
+
extend ParseLine::CSV
|
|
54
|
+
csv_layout :delimiter => ";" do |parse|
|
|
55
|
+
parse.field :code
|
|
56
|
+
parse.field :name
|
|
57
|
+
parse.ignore_field
|
|
58
|
+
parse.field :in_stock, lambda {|s| s == 'Y' }
|
|
59
|
+
parse.field :date , lambda {|d| d.gsub(/(\d{2})\/(\d{2})\/(\d{4})/,'\3-\1-\2') }
|
|
60
|
+
parse.field :price
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
Each column will be mapped into a field, except the column after :name that is being ignored.
|
|
65
|
+
|
|
66
|
+
===== Loading external file
|
|
67
|
+
There are two ways that load data, by line use method Product.load_line to return an instance of Product
|
|
68
|
+
|
|
69
|
+
data_file=File.readlines("data.csv")
|
|
70
|
+
@product=Product.load_line data_file.first
|
|
71
|
+
@product.save #data will go to database
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
or loading from file with all records returns an array of Products
|
|
75
|
+
|
|
76
|
+
@products=Product.load_lines "data.csv"
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
====Using Fixed Width
|
|
80
|
+
To load data with Fixed Width we need do a extend the module ParseLine::FixedWidth. It uses the same class methods to load data, the load_line and load_lines. See the file Fixed Width
|
|
81
|
+
|
|
82
|
+
000001PRODUT 1 Y 11/21/2008000090.00
|
|
83
|
+
000002PRODUT 2 N 11/22/2008000341.33
|
|
84
|
+
000003PRODUT 3 N 11/01/2008000001.99
|
|
85
|
+
000004PRODUT 4 Y 11/15/2008000034.98
|
|
86
|
+
000005PRODUT 5 N 11/14/2008000130.44
|
|
87
|
+
000006PRODUT 6 Y 11/05/2008000020.11
|
|
88
|
+
|
|
89
|
+
let's split those lines and define a model like
|
|
90
|
+
require 'parseline'
|
|
91
|
+
|
|
92
|
+
class Product < ActiveRecord::Base
|
|
93
|
+
extend ParseLine::FixedWidth
|
|
94
|
+
fixed_width_layout do |parse|
|
|
95
|
+
parse.field :code , 0..5
|
|
96
|
+
parse.field :name, 6..21
|
|
97
|
+
parse.field :in_stock, 22..22, lambda {|s| s == 'Y' }
|
|
98
|
+
parse.field :date , 24..33, lambda {|d| d.gsub(/(\d{2})\/(\d{2})\/(\d{4})/,'\3-\1-\2') }
|
|
99
|
+
parse.field :price, 34..42
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
After that
|
|
104
|
+
|
|
105
|
+
@products=Product.load_lines "data.txt"
|
|
106
|
+
|
|
107
|
+
===With whatever class
|
|
108
|
+
|
|
109
|
+
====Using Fixed Width
|
|
110
|
+
|
|
111
|
+
require 'rubygems'
|
|
112
|
+
require 'parseline'
|
|
113
|
+
|
|
114
|
+
class Person
|
|
115
|
+
|
|
116
|
+
attr_accessor :id
|
|
117
|
+
attr_accessor :age
|
|
118
|
+
|
|
119
|
+
extend ParseLine::FixedWidth
|
|
120
|
+
|
|
121
|
+
fixed_width_layout do |parse|
|
|
122
|
+
parse.field :id, 0..4
|
|
123
|
+
parse.field :age, 5..6
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
After that
|
|
128
|
+
|
|
129
|
+
@people=Person.load_lines "data.txt"
|
|
130
|
+
|
|
131
|
+
and
|
|
132
|
+
|
|
133
|
+
@people.each{|person| p "id=#{person.id} and age=#{person.age}" }
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
Remember to use the parse module
|
|
137
|
+
It's easy to do, so enjoy and make money with it!
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
|
data/lib/parseline.rb
CHANGED
|
@@ -1,5 +1,111 @@
|
|
|
1
1
|
#shairon.toledo@gmail.com
|
|
2
2
|
#http://www.hashcode.eti.br
|
|
3
|
+
#
|
|
4
|
+
#The purpose of gem parseline is to help the developers to load external CSV and fixed width files.
|
|
5
|
+
#
|
|
6
|
+
#===Installation
|
|
7
|
+
# sudo gem sources --add http://gems.github.com
|
|
8
|
+
# sudo gem install parseline
|
|
9
|
+
#
|
|
10
|
+
#
|
|
11
|
+
#===How to Use
|
|
12
|
+
#As single data mapper
|
|
13
|
+
#
|
|
14
|
+
#====Using CSV
|
|
15
|
+
#You can use the files delimited by a character or using regexp. To demonstrate its we'll use a migration/table
|
|
16
|
+
#
|
|
17
|
+
#class CreateProducts < ActiveRecord::Migration
|
|
18
|
+
# def self.up
|
|
19
|
+
# create_table :products do |t|
|
|
20
|
+
# t.integer :code
|
|
21
|
+
# t.string :name
|
|
22
|
+
# t.boolean :in_stock
|
|
23
|
+
# t.date :date
|
|
24
|
+
# t.float :price
|
|
25
|
+
# end
|
|
26
|
+
# end
|
|
27
|
+
#
|
|
28
|
+
# def self.down
|
|
29
|
+
# drop_table :products
|
|
30
|
+
# end
|
|
31
|
+
#end
|
|
32
|
+
#
|
|
33
|
+
#and a given file called 'data.csv' with the content below.
|
|
34
|
+
#
|
|
35
|
+
# 1;PRODUT 1;Y;;11/21/2008;90.00
|
|
36
|
+
# 2;PRODUT 2;N;;11/22/2008;341.33
|
|
37
|
+
# 3;PRODUT 3;N;;11/01/2008;1.99
|
|
38
|
+
# 4;PRODUT 4;Y;;11/15/2008;34.98
|
|
39
|
+
# 5;PRODUT 5;N;;11/14/2008;130.44
|
|
40
|
+
# 6;PRODUT 6;Y;;11/05/2008;20.11
|
|
41
|
+
#
|
|
42
|
+
#The descriptions of data layout are
|
|
43
|
+
#
|
|
44
|
+
# product's code;name;if it is in stock;*reserved for future use*;date;price
|
|
45
|
+
#
|
|
46
|
+
#As you can see we need format the column 'in stock' to boolean format and convert the date format from MM/DD/YYYY to YYYY-MM-DD.
|
|
47
|
+
#For every parse.field call, you receive the field to format using a lambda block.
|
|
48
|
+
#
|
|
49
|
+
#so, take a look at the ActiveRecord definition using the module ParseLine::CSV
|
|
50
|
+
# require 'parseline'
|
|
51
|
+
#
|
|
52
|
+
# class Product < ActiveRecord::Base
|
|
53
|
+
# extend ParseLine::CSV
|
|
54
|
+
# csv_layout :delimiter => ";" do |parse|
|
|
55
|
+
# parse.field :code
|
|
56
|
+
# parse.field :name
|
|
57
|
+
# parse.ignore_field
|
|
58
|
+
# parse.field :in_stock, lambda {|s| s == 'Y' }
|
|
59
|
+
# parse.field :date , lambda {|d| d.gsub(/(\d{2})\/(\d{2})\/(\d{4})/,'\3-\1-\2') }
|
|
60
|
+
# parse.field :price
|
|
61
|
+
# end
|
|
62
|
+
# end
|
|
63
|
+
#
|
|
64
|
+
#Each column will be mapped into a field, except the column after :name that is being ignored.
|
|
65
|
+
#
|
|
66
|
+
#===== Loading external file
|
|
67
|
+
#There are two ways that load data, by line use method Product.load_line to return an instance of Product
|
|
68
|
+
#
|
|
69
|
+
# data_file=File.readlines("data.csv")
|
|
70
|
+
# @product=Product.load_line data_file.first
|
|
71
|
+
# @product.save #data will go to database
|
|
72
|
+
#
|
|
73
|
+
#
|
|
74
|
+
#or loading from file with all records returns an array of Products
|
|
75
|
+
#
|
|
76
|
+
# @products=Product.load_lines "data.csv"
|
|
77
|
+
#
|
|
78
|
+
#
|
|
79
|
+
#====Using Fixed Width
|
|
80
|
+
#To load data with Fixed Width we need do a extend the module ParseLine::FixedWidth. It uses the same class methods to load data, the load_line and load_lines. See the file Fixed Width
|
|
81
|
+
#
|
|
82
|
+
# 000001PRODUT 1 Y 11/21/2008000090.00
|
|
83
|
+
# 000002PRODUT 2 N 11/22/2008000341.33
|
|
84
|
+
# 000003PRODUT 3 N 11/01/2008000001.99
|
|
85
|
+
# 000004PRODUT 4 Y 11/15/2008000034.98
|
|
86
|
+
# 000005PRODUT 5 N 11/14/2008000130.44
|
|
87
|
+
# 000006PRODUT 6 Y 11/05/2008000020.11
|
|
88
|
+
#
|
|
89
|
+
#let's split those lines and define a model like
|
|
90
|
+
# require 'parseline'
|
|
91
|
+
#
|
|
92
|
+
# class Product < ActiveRecord::Base
|
|
93
|
+
# extend ParseLine::FixedWidth
|
|
94
|
+
# fixed_width_layout do |parse|
|
|
95
|
+
# parse.field :code , 0..5
|
|
96
|
+
# parse.field :name, 6..21
|
|
97
|
+
# parse.field :in_stock, 22..22, lambda {|s| s == 'Y' }
|
|
98
|
+
# parse.field :date , 24..33, lambda {|d| d.gsub(/(\d{2})\/(\d{2})\/(\d{4})/,'\3-\1-\2') }
|
|
99
|
+
# parse.field :price, 34..42
|
|
100
|
+
# end
|
|
101
|
+
# end
|
|
102
|
+
#
|
|
103
|
+
#After that
|
|
104
|
+
#
|
|
105
|
+
# @products=Product.load_lines "data.csv"
|
|
106
|
+
#
|
|
107
|
+
#Remember to use the parse module
|
|
108
|
+
#It's easy to do, so enjoy and make money with it!
|
|
3
109
|
module ParseLine
|
|
4
110
|
require 'parseline/parser_line_csv'
|
|
5
111
|
require 'parseline/parser_line_fixed_width'
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
#shairon.toledo@gmail.com
|
|
2
2
|
#http://www.hashcode.eti.br
|
|
3
3
|
module ParseLine::FixedWidth
|
|
4
|
-
|
|
4
|
+
|
|
5
5
|
self.class_eval do
|
|
6
|
-
|
|
6
|
+
|
|
7
7
|
def fixed_width_layout(&block)
|
|
8
8
|
yield self
|
|
9
9
|
end
|
|
10
|
-
|
|
10
|
+
|
|
11
11
|
def parse_field(field,range,block=nil)
|
|
12
12
|
self.send(:class_variable_set,:@@parse_values,[]) unless self.class_variable_defined?(:@@parse_values)
|
|
13
13
|
self.send(:class_variable_get,:@@parse_values) << [field,range,block]
|
|
@@ -16,12 +16,34 @@ module ParseLine::FixedWidth
|
|
|
16
16
|
def field(field,range,block=nil)
|
|
17
17
|
parse_field(field,range,block)
|
|
18
18
|
end
|
|
19
|
-
|
|
20
|
-
def load_lines(filepath)
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
19
|
+
|
|
20
|
+
def load_lines(filepath, options={})
|
|
21
|
+
case options[:except].class.to_s
|
|
22
|
+
when "Array"
|
|
23
|
+
lines=[]
|
|
24
|
+
File.open(filepath).each_with_index do |line, i|
|
|
25
|
+
if options[:length] == nil
|
|
26
|
+
lines << load_line(line) unless options[:except].include?(i+1)
|
|
27
|
+
else
|
|
28
|
+
lines << load_line(line) unless options[:except].include?(i+1) if (line.size - 2) == options[:length].to_i
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
lines
|
|
32
|
+
when "Regexp"
|
|
33
|
+
lines=[]
|
|
34
|
+
File.open(filepath).each do |line|
|
|
35
|
+
if options[:length] == nil
|
|
36
|
+
lines << load_line(line) if options[:except] !~ line
|
|
37
|
+
else
|
|
38
|
+
lines << load_line(line) if options[:except] !~ line if (line.size - 2) == options[:length].to_i
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
lines
|
|
42
|
+
else
|
|
43
|
+
File.open(filepath).map{|line| load_line(line)}
|
|
44
|
+
end
|
|
24
45
|
end
|
|
46
|
+
|
|
25
47
|
def load_line(line)
|
|
26
48
|
this=self.new
|
|
27
49
|
begin
|
|
@@ -37,8 +59,5 @@ module ParseLine::FixedWidth
|
|
|
37
59
|
raise "ParseLine::MalformedLayoutOrLine: '#{line.to_s.strip}', size: #{line.to_s.size}"
|
|
38
60
|
end
|
|
39
61
|
end
|
|
40
|
-
|
|
41
62
|
end
|
|
42
|
-
|
|
43
|
-
end
|
|
44
|
-
|
|
63
|
+
end
|
metadata
CHANGED
|
@@ -1,36 +1,37 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: parseline
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.0.
|
|
4
|
+
version: 1.0.3
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Shairon Toledo
|
|
8
|
-
autorequire:
|
|
8
|
+
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
11
|
|
|
12
|
-
date:
|
|
12
|
+
date: 2009-05-04 00:00:00 -04:00
|
|
13
13
|
default_executable:
|
|
14
14
|
dependencies: []
|
|
15
15
|
|
|
16
|
-
description:
|
|
16
|
+
description: The purpose of gem parseline is to help the developers to load external CSV and fixed width files.
|
|
17
17
|
email: shairon.toledo@gmail.com
|
|
18
18
|
executables: []
|
|
19
19
|
|
|
20
20
|
extensions: []
|
|
21
21
|
|
|
22
|
-
extra_rdoc_files:
|
|
23
|
-
|
|
22
|
+
extra_rdoc_files:
|
|
23
|
+
- README.rdoc
|
|
24
24
|
files:
|
|
25
|
-
- lib/parseline
|
|
25
|
+
- lib/parseline.rb
|
|
26
26
|
- lib/parseline/parser_line_csv.rb
|
|
27
27
|
- lib/parseline/parser_line_fixed_width.rb
|
|
28
|
-
-
|
|
28
|
+
- README.rdoc
|
|
29
29
|
has_rdoc: false
|
|
30
30
|
homepage: http://parseline.rubyforge.org/
|
|
31
31
|
post_install_message:
|
|
32
|
-
rdoc_options:
|
|
33
|
-
|
|
32
|
+
rdoc_options:
|
|
33
|
+
- --main
|
|
34
|
+
- README.rdoc
|
|
34
35
|
require_paths:
|
|
35
36
|
- lib
|
|
36
37
|
required_ruby_version: !ruby/object:Gem::Requirement
|
|
@@ -51,6 +52,6 @@ rubyforge_project: parseline
|
|
|
51
52
|
rubygems_version: 1.3.1
|
|
52
53
|
signing_key:
|
|
53
54
|
specification_version: 2
|
|
54
|
-
summary:
|
|
55
|
+
summary: Load from files to ActiveRecord
|
|
55
56
|
test_files: []
|
|
56
57
|
|