fixed_width 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,17 @@
1
+ #
2
+ # Taken from ActiveSupport 2.3.5 lib/active_support/core_ext/symbol.rb
3
+ #
4
+ unless :to_proc.respond_to?(:to_proc)
5
+ class Symbol
6
+ # Turns the symbol into a simple proc, which is especially useful for enumerations. Examples:
7
+ #
8
+ # # The same as people.collect { |p| p.name }
9
+ # people.collect(&:name)
10
+ #
11
+ # # The same as people.select { |p| p.manager? }.collect { |p| p.salary }
12
+ # people.select(&:manager?).collect(&:salary)
13
+ def to_proc
14
+ Proc.new { |*args| args.shift.__send__(self, *args) }
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,31 @@
1
+ class FixedWidth
2
+ class Definition
3
+ attr_reader :sections, :templates, :options
4
+
5
+ def initialize(options={})
6
+ @sections = []
7
+ @templates = {}
8
+ @options = { :align => :right }.merge(options)
9
+ end
10
+
11
+ def section(name, options={}, &block)
12
+ raise DuplicateSectionNameError.new("Duplicate section name: '#{name}'") if @sections.detect{|s| s.name == name }
13
+
14
+ section = FixedWidth::Section.new(name, @options.merge(options))
15
+ section.definition = self
16
+ yield(section)
17
+ @sections << section
18
+ section
19
+ end
20
+
21
+ def template(name, options={}, &block)
22
+ section = FixedWidth::Section.new(name, @options.merge(options))
23
+ yield(section)
24
+ @templates[name] = section
25
+ end
26
+
27
+ def method_missing(method, *args, &block)
28
+ section(method, *args, &block)
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,84 @@
1
+ #
2
+ # =DESCRIPTION:
3
+ #
4
+ # A simple, clean DSL for describing, writing, and parsing fixed-width text files.
5
+ #
6
+ # =FEATURES:
7
+ #
8
+ # * Easy DSL syntax
9
+ # * Can parse and format fixed width files
10
+ # * Templated sections for reuse
11
+ #
12
+ # For examples, see examples/*.rb or the README.
13
+ #
14
+ class FixedWidth
15
+ class ParserError < RuntimeError; end
16
+ class DuplicateColumnNameError < StandardError; end
17
+ class DuplicateGroupNameError < StandardError; end
18
+ class DuplicateSectionNameError < StandardError; end
19
+ class RequiredSectionNotFoundError < StandardError; end
20
+ class RequiredSectionEmptyError < StandardError; end
21
+ class FormattedStringExceedsLengthError < StandardError; end
22
+ class ColumnMismatchError < StandardError; end
23
+
24
+ #
25
+ # [name] a symbol to reference this file definition later
26
+ # [option] a hash of default options for all sub-elements
27
+ # and a block that defines the sections of the file.
28
+ #
29
+ # returns: +Definition+ instance for this file description.
30
+ #
31
+ def self.define(name, options={}) # yields definition
32
+ definition = Definition.new(options)
33
+ yield(definition)
34
+ definitions[name] = definition
35
+ definition
36
+ end
37
+
38
+ #
39
+ # [data] nested hash describing the contents of the sections
40
+ # [definition_name] symbol +name+ used in +define+
41
+ #
42
+ # returns: string of the transformed +data+ (into fixed-width records).
43
+ #
44
+ def self.generate(definition_name, data)
45
+ definition = definition(definition_name)
46
+ raise ArgumentError.new("Definition name '#{name}' was not found.") unless definition
47
+ generator = Generator.new(definition)
48
+ generator.generate(data)
49
+ end
50
+
51
+ #
52
+ # [file] IO object to write the +generate+d data
53
+ # [definition_name] symbol +name+ used in +define+
54
+ # [data] nested hash describing the contents of the sections
55
+ #
56
+ # writes transformed data to +file+ object as fixed-width records.
57
+ #
58
+ def self.write(file, definition_name, data)
59
+ file.write(generate(definition_name, data))
60
+ end
61
+
62
+ #
63
+ # [file] IO object from which to read the fixed-width text records
64
+ # [definition_name] symbol +name+ used in +define+
65
+ #
66
+ # returns: parsed text records in a nested hash.
67
+ #
68
+ def self.parse(file, definition_name)
69
+ definition = definition(definition_name)
70
+ raise ArgumentError.new("Definition name '#{definition_name}' was not found.") unless definition
71
+ parser = Parser.new(definition, file)
72
+ parser.parse
73
+ end
74
+
75
+ private
76
+
77
+ def self.definitions
78
+ @@definitions ||= {}
79
+ end
80
+
81
+ def self.definition(name)
82
+ definitions[name]
83
+ end
84
+ end
@@ -0,0 +1,20 @@
1
+ class FixedWidth
2
+ class Generator
3
+
4
+ def initialize(definition)
5
+ @definition = definition
6
+ end
7
+
8
+ def generate(data)
9
+ @builder = []
10
+ @definition.sections.each do |section|
11
+ content = data[section.name]
12
+ arrayed_content = content.is_a?(Array) ? content : [content]
13
+ raise FixedWidth::RequiredSectionEmptyError.new("Required section '#{section.name}' was empty.") if (content.nil? || content.empty?) && !section.optional
14
+ arrayed_content.each {|row| @builder << section.format(row) }
15
+ end
16
+ @builder.join("\n")
17
+ end
18
+
19
+ end
20
+ end
@@ -0,0 +1,47 @@
1
+ class FixedWidth
2
+ class Parser
3
+ def initialize(definition, file)
4
+ @definition = definition
5
+ @file = file
6
+ end
7
+
8
+ def parse
9
+ @parsed = {}
10
+ @content = read_file
11
+ unless @content.empty?
12
+ @definition.sections.each do |section|
13
+ rows = fill_content(section)
14
+ raise FixedWidth::RequiredSectionNotFoundError.new("Required section '#{section.name}' was not found.") unless rows > 0 || section.optional
15
+ end
16
+ end
17
+ @parsed
18
+ end
19
+
20
+ private
21
+
22
+ def read_file
23
+ @file.readlines.map(&:chomp)
24
+ end
25
+
26
+ def fill_content(section)
27
+ matches = 0
28
+ loop do
29
+ line = @content.first
30
+ break unless section.match(line)
31
+ add_to_section(section, line)
32
+ matches += 1
33
+ @content.shift
34
+ end
35
+ matches
36
+ end
37
+
38
+ def add_to_section(section, line)
39
+ if section.singular
40
+ @parsed[section.name] = section.parse(line)
41
+ else
42
+ @parsed[section.name] ||= []
43
+ @parsed[section.name] << section.parse(line)
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,91 @@
1
+ class FixedWidth
2
+ class Section
3
+ attr_accessor :definition, :optional, :singular
4
+ attr_reader :name, :columns, :options
5
+
6
+ def initialize(name, options={})
7
+ @name = name
8
+ @options = options
9
+ @columns = []
10
+ @trap = options[:trap]
11
+ @optional = options[:optional] || false
12
+ @singular = options[:singular] || false
13
+ end
14
+
15
+ def column(name, length, options={})
16
+ if column_names_by_group(options[:group]).include?(name)
17
+ raise FixedWidth::DuplicateColumnNameError.new("You have already defined a column named '#{name}' in the '#{options[:group].inspect}' group.")
18
+ end
19
+ if column_names_by_group(nil).include?(options[:group])
20
+ raise FixedWidth::DuplicateGroupNameError.new("You have already defined a column named '#{options[:group]}'; you cannot have a group and column of the same name.")
21
+ end
22
+ if group_names.include?(name)
23
+ raise FixedWidth::DuplicateGroupNameError.new("You have already defined a group named '#{name}'; you cannot have a group and column of the same name.")
24
+ end
25
+
26
+ col = Column.new(name, length, @options.merge(options))
27
+ @columns << col
28
+ col
29
+ end
30
+
31
+ def spacer(length, spacer=nil)
32
+ options = {}
33
+ options[:padding] = spacer if spacer
34
+ column(:spacer, length, options)
35
+ end
36
+
37
+ def trap(&block)
38
+ @trap = block
39
+ end
40
+
41
+ def template(name)
42
+ template = @definition.templates[name]
43
+ raise ArgumentError.new("Template '#{name}' not found as a known template.") unless template
44
+ @columns += template.columns
45
+ # Section options should trump template options
46
+ @options = template.options.merge(@options)
47
+ end
48
+
49
+ def format(data)
50
+ @columns.map do |c|
51
+ hash = c.group ? data[c.group] : data
52
+ c.format(hash[c.name])
53
+ end.join
54
+ end
55
+
56
+ def parse(line)
57
+ line_data = line.unpack(unpacker)
58
+ row = group_names.inject({}) {|h,g| h[g] = {}; h }
59
+
60
+ @columns.each_with_index do |c, i|
61
+ next if c.name == :spacer
62
+ assignee = c.group ? row[c.group] : row
63
+ assignee[c.name] = c.parse(line_data[i])
64
+ end
65
+
66
+ row
67
+ end
68
+
69
+ def match(raw_line)
70
+ raw_line.nil? ? false : @trap.call(raw_line)
71
+ end
72
+
73
+ def method_missing(method, *args)
74
+ column(method, *args)
75
+ end
76
+
77
+ private
78
+
79
+ def column_names_by_group(group)
80
+ @columns.select{|c| c.group == group }.map(&:name) - [:spacer]
81
+ end
82
+
83
+ def group_names
84
+ @columns.map(&:group).compact.uniq
85
+ end
86
+
87
+ def unpacker
88
+ @unpacker ||= @columns.map(&:unpacker).join
89
+ end
90
+ end
91
+ end
@@ -0,0 +1,9 @@
1
+ $: << File.dirname(__FILE__)
2
+ require 'ostruct'
3
+ require 'fixed_width/core_ext/symbol'
4
+ require 'fixed_width/fixed_width'
5
+ require 'fixed_width/definition'
6
+ require 'fixed_width/section'
7
+ require 'fixed_width/column'
8
+ require 'fixed_width/parser'
9
+ require 'fixed_width/generator'
@@ -0,0 +1,209 @@
1
+ require File.join(File.dirname(__FILE__), 'spec_helper')
2
+
3
+ describe FixedWidth::Column do
4
+ before(:each) do
5
+ @name = :id
6
+ @length = 5
7
+ @column = FixedWidth::Column.new(@name, @length)
8
+ end
9
+
10
+ describe "when being created" do
11
+ it "should have a name" do
12
+ @column.name.should == @name
13
+ end
14
+
15
+ it "should have a length" do
16
+ @column.length.should == @length
17
+ end
18
+
19
+ it "should have a default padding" do
20
+ @column.padding.should == ' '
21
+ end
22
+
23
+ it "should have a default alignment" do
24
+ @column.alignment.should == :right
25
+ end
26
+
27
+ it "should have a default truncation" do
28
+ @column.truncate.should be_false
29
+ end
30
+
31
+ it "should build the proper unpack value for a string" do
32
+ @column.send(:unpacker).should == 'A5'
33
+ end
34
+ end
35
+
36
+ describe "when specifying an alignment" do
37
+ before(:each) do
38
+ @column = FixedWidth::Column.new(@name, @length, :align => :left)
39
+ end
40
+
41
+ it "should only accept :right or :left for an alignment" do
42
+ lambda{ FixedWidth::Column.new(@name, @length, :align => :bogus) }.should raise_error(ArgumentError, "Option :align only accepts :right (default) or :left")
43
+ end
44
+
45
+ it "should override the default alignment" do
46
+ @column.alignment.should == :left
47
+ end
48
+ end
49
+
50
+ describe "when specifying padding" do
51
+ before(:each) do
52
+ @column = FixedWidth::Column.new(@name, @length, :padding => '0')
53
+ end
54
+
55
+ it "should override the default padding" do
56
+ @column.padding.should == '0'
57
+ end
58
+ end
59
+
60
+ describe "when parsing a value from a file" do
61
+ it "should return nil for blank fields if specified" do
62
+ @column = FixedWidth::Column.new(@name, @length, :padding => '0', :nil_blank => true)
63
+ @column.parse(' name ').should == 'name '
64
+ @column.parse(" \t \n").should == nil
65
+ @column.parse(" 0 \n").should == "0 \n"
66
+ @column.parse('').should == nil
67
+ end
68
+
69
+ it "should default to returning formatted strings if nil_blank is not set" do
70
+ @column = FixedWidth::Column.new(@name, @length, :padding => '0', :nil_blank => false)
71
+ @column.parse(' name ').should == 'name '
72
+ @column.parse(" \t \n").should == ""
73
+ @column.parse(" 0 \n").should == "0 \n"
74
+ @column.parse('').should == ""
75
+ end
76
+
77
+ it "should default to a right-aligned string" do
78
+ @column.parse(' name ').should == 'name '
79
+ @column.parse(" \t 234").should == '234'
80
+ @column.parse(" 000000234 \n").should == "000000234 \n"
81
+ @column.parse('12.34').should == '12.34'
82
+ end
83
+
84
+ it "should default to a left-aligned string if no parser specified, but alignment is left" do
85
+ @column = FixedWidth::Column.new(@name, @length, :align => :left)
86
+
87
+ @column.parse(' name ').should == ' name'
88
+ @column.parse(" \t 234").should == " \t 234"
89
+ @column.parse(" 000000234 \n").should == " 000000234"
90
+ @column.parse('12.34').should == '12.34'
91
+ end
92
+
93
+ it "should support a symbol as the parser (:to_i)" do
94
+ @column = FixedWidth::Column.new(:amount, 10, :parser => :to_i)
95
+ @column.parse('234 ').should == 234
96
+ @column.parse(' 234').should == 234
97
+ @column.parse('00000234').should == 234
98
+ @column.parse('Ryan ').should == 0
99
+ @column.parse('00023.45').should == 23
100
+ end
101
+
102
+ it "should support a symbol as the parser (:to_f)" do
103
+ @column = FixedWidth::Column.new(:amount, 10, :parser => :to_f)
104
+ @column.parse(' 234.45').should == 234.45
105
+ @column.parse('234.5600').should == 234.56
106
+ @column.parse(' 234').should == 234.0
107
+ @column.parse('00000234').should == 234.0
108
+ @column.parse('Ryan ').should == 0
109
+ @column.parse('00023.45').should == 23.45
110
+ end
111
+
112
+ it "should support a lambda as the parser (date)" do
113
+ @column = FixedWidth::Column.new(:date, 10, :parser => lambda{|x| Date.strptime(x, "%m%d%Y")})
114
+ dt = @column.parse('08222009')
115
+ dt.should be_a(Date)
116
+ dt.to_s.should == '2009-08-22'
117
+ end
118
+ end
119
+
120
+ describe "when applying formatting options" do
121
+ it "should respect a right alignment" do
122
+ @column = FixedWidth::Column.new(@name, @length, :align => :right)
123
+ @column.format(25).should == ' 25'
124
+ end
125
+
126
+ it "should respect a left alignment" do
127
+ @column = FixedWidth::Column.new(@name, @length, :align => :left)
128
+ @column.format(25).should == '25 '
129
+ end
130
+
131
+ it "should respect padding with spaces" do
132
+ @column = FixedWidth::Column.new(@name, @length, :padding => ' ')
133
+ @column.format(25).should == ' 25'
134
+ end
135
+
136
+ it "should respect padding with zeros" do
137
+ @column = FixedWidth::Column.new(@name, @length, :padding => '0')
138
+ @column.format(25).should == '00025'
139
+ end
140
+
141
+ it "should work with non-string inputs and the default formatter" do
142
+ @column = FixedWidth::Column.new(@name, @length)
143
+ @column.format(25).should == ' 25'
144
+ end
145
+ end
146
+
147
+ describe "when formatting values for a file" do
148
+ it "should default to a string" do
149
+ @column = FixedWidth::Column.new(:name, 10)
150
+ @column.format('Bill').should == ' Bill'
151
+ end
152
+
153
+ describe "whose size is too long" do
154
+ it "should raise an error if truncate is false" do
155
+ @value = "XX" * @length
156
+ lambda { @column.format(@value) }.should raise_error(
157
+ FixedWidth::FormattedStringExceedsLengthError,
158
+ "The formatted value '#{@value}' in column '#{@name}' exceeds the allowed length of #{@length} chararacters."
159
+ )
160
+ end
161
+
162
+ it "should truncate from the left if truncate is true and aligned left" do
163
+ @column = FixedWidth::Column.new(@name, @length, :truncate => true, :align => :left)
164
+ @column.format("This is too long").should == "This "
165
+ end
166
+
167
+ it "should truncate from the right if truncate is true and aligned right" do
168
+ @column = FixedWidth::Column.new(@name, @length, :truncate => true, :align => :right)
169
+ @column.format("This is too long").should == " long"
170
+ end
171
+ end
172
+
173
+ it "should support a symbol formatter (:to_s)" do
174
+ @column = FixedWidth::Column.new(:amount, 10, :formatter => :to_s)
175
+ @column.format(234).should == ' 234'
176
+ @column.format('234').should == ' 234'
177
+ end
178
+
179
+ it "should support a lambda formatter (.to_f.to_s)" do
180
+ @column = FixedWidth::Column.new(:amount, 10, :formatter => lambda {|x| x.to_f.to_s })
181
+ @column.format(234.45).should == ' 234.45'
182
+ @column.format('234.4500').should == ' 234.45'
183
+ @column.format('3').should == ' 3.0'
184
+ end
185
+
186
+ it "should support a lambda formatter (float with sprintf)" do
187
+ @column = FixedWidth::Column.new(:amount, 10, :formatter => lambda {|x| "%.3f" % x.to_f })
188
+ @column.format(234.45).should == ' 234.450'
189
+ @column.format('234.4500').should == ' 234.450'
190
+ @column.format('3').should == ' 3.000'
191
+ end
192
+
193
+ it "should support the float type with a format, alignment and padding" do
194
+ @column = FixedWidth::Column.new(:amount, 10, :formatter => lambda {|x| "%.2f" % x.to_f }, :align => :left, :padding => '0')
195
+ @column.format(234.45).should == '234.450000'
196
+ @column = FixedWidth::Column.new(:amount, 10, :formatter => lambda {|x| "%.2f" % x.to_f }, :align => :right, :padding => '0')
197
+ @column.format('234.400').should == '0000234.40'
198
+ @column = FixedWidth::Column.new(:amount, 10, :formatter => lambda {|x| "%.4f" % x.to_f }, :align => :left, :padding => ' ')
199
+ @column.format('3').should == '3.0000 '
200
+ end
201
+
202
+ it "should support the date type with a :format" do
203
+ dt = Date.new(2009, 8, 22)
204
+ @column = FixedWidth::Column.new(:date, 8, :formatter => lambda {|x| x.strftime("%m%d%Y") } )
205
+ @column.format(dt).should == '08222009'
206
+ end
207
+ end
208
+
209
+ end