fixed_width-multibyte 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,14 @@
1
+ $: << File.dirname(__FILE__)
2
+ require 'ostruct'
3
+
4
+ require 'active_support'
5
+ require 'active_support/version'
6
+ require 'active_support/multibyte' if ::ActiveSupport::VERSION::MAJOR >= 3
7
+
8
+ require 'fixed_width/core_ext/symbol'
9
+ require 'fixed_width/fixed_width'
10
+ require 'fixed_width/definition'
11
+ require 'fixed_width/section'
12
+ require 'fixed_width/column'
13
+ require 'fixed_width/parser'
14
+ require 'fixed_width/generator'
@@ -0,0 +1,86 @@
1
+ class FixedWidth
2
+ class Column
3
+ DEFAULT_PADDING = ' '
4
+ DEFAULT_ALIGNMENT = :right
5
+ DEFAULT_TRUNCATE = false
6
+ DEFAULT_FORMATTER = :to_s
7
+
8
+ attr_reader :name, :length, :alignment, :padding, :truncate, :group
9
+
10
+ def initialize(name, length, options={})
11
+ assert_valid_options(options)
12
+ @name = name
13
+ @length = length
14
+ @alignment = options[:align] || DEFAULT_ALIGNMENT
15
+ @padding = options[:padding] || DEFAULT_PADDING
16
+ @truncate = options[:truncate] || DEFAULT_TRUNCATE
17
+
18
+ @group = options[:group]
19
+
20
+ @parser = options[:parser]
21
+ @parser = @parser.to_proc if @parser.is_a?(Symbol)
22
+
23
+ @formatter = options[:formatter]
24
+ @formatter ||= DEFAULT_FORMATTER
25
+ @formatter = @formatter.to_proc if @formatter.is_a?(Symbol)
26
+
27
+ @nil_blank = options[:nil_blank]
28
+ end
29
+
30
+ def parse(value)
31
+ if @nil_blank && blank?(value)
32
+ return nil
33
+ elsif @parser
34
+ @parser.call(value)
35
+ else
36
+ case @alignment
37
+ when :right
38
+ value.lstrip
39
+ when :left
40
+ value.rstrip
41
+ end
42
+ end
43
+ rescue
44
+ raise ParserError.new("The value '#{value}' could not be parsed: #{$!}")
45
+ end
46
+
47
+ def format(value)
48
+ pad(
49
+ validate_size(
50
+ @formatter.call(value)
51
+ )
52
+ )
53
+ end
54
+
55
+ private
56
+ BLANK_REGEX = /^\s*$/
57
+ def blank?(value)
58
+ value =~ BLANK_REGEX
59
+ end
60
+
61
+ def pad(value)
62
+ case @alignment
63
+ when :left
64
+ value.ljust(@length, @padding)
65
+ when :right
66
+ value.rjust(@length, @padding)
67
+ end
68
+ end
69
+
70
+ def assert_valid_options(options)
71
+ unless options[:align].nil? || [:left, :right].include?(options[:align])
72
+ raise ArgumentError.new("Option :align only accepts :right (default) or :left")
73
+ end
74
+ end
75
+
76
+ def validate_size(result)
77
+ return result if result.length <= @length
78
+ raise FixedWidth::FormattedStringExceedsLengthError.new(
79
+ "The formatted value '#{result}' in column '#{@name}' exceeds the allowed length of #{@length} chararacters.") unless @truncate
80
+ case @alignment
81
+ when :right then result[-@length,@length]
82
+ when :left then result[0,@length]
83
+ end
84
+ end
85
+ end
86
+ end
@@ -0,0 +1,17 @@
1
+ #
2
+ # Taken from ActiveSupport 2.3.5 lib/active_support/core_ext/symbol.rb
3
+ #
4
+ unless :to_proc.respond_to?(:to_proc)
5
+ class Symbol
6
+ # Turns the symbol into a simple proc, which is especially useful for enumerations. Examples:
7
+ #
8
+ # # The same as people.collect { |p| p.name }
9
+ # people.collect(&:name)
10
+ #
11
+ # # The same as people.select { |p| p.manager? }.collect { |p| p.salary }
12
+ # people.select(&:manager?).collect(&:salary)
13
+ def to_proc
14
+ Proc.new { |*args| args.shift.__send__(self, *args) }
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,31 @@
1
+ class FixedWidth
2
+ class Definition
3
+ attr_reader :sections, :templates, :options
4
+
5
+ def initialize(options={})
6
+ @sections = []
7
+ @templates = {}
8
+ @options = { :align => :right }.merge(options)
9
+ end
10
+
11
+ def section(name, options={}, &block)
12
+ raise DuplicateSectionNameError.new("Duplicate section name: '#{name}'") if @sections.detect{|s| s.name == name }
13
+
14
+ section = FixedWidth::Section.new(name, @options.merge(options))
15
+ section.definition = self
16
+ yield(section)
17
+ @sections << section
18
+ section
19
+ end
20
+
21
+ def template(name, options={}, &block)
22
+ section = FixedWidth::Section.new(name, @options.merge(options))
23
+ yield(section)
24
+ @templates[name] = section
25
+ end
26
+
27
+ def method_missing(method, *args, &block)
28
+ section(method, *args, &block)
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,84 @@
1
+ #
2
+ # =DESCRIPTION:
3
+ #
4
+ # A simple, clean DSL for describing, writing, and parsing fixed-width text files.
5
+ #
6
+ # =FEATURES:
7
+ #
8
+ # * Easy DSL syntax
9
+ # * Can parse and format fixed width files
10
+ # * Templated sections for reuse
11
+ #
12
+ # For examples, see examples/*.rb or the README.
13
+ #
14
+ class FixedWidth
15
+ class ParserError < RuntimeError; end
16
+ class DuplicateColumnNameError < StandardError; end
17
+ class DuplicateGroupNameError < StandardError; end
18
+ class DuplicateSectionNameError < StandardError; end
19
+ class RequiredSectionNotFoundError < StandardError; end
20
+ class RequiredSectionEmptyError < StandardError; end
21
+ class FormattedStringExceedsLengthError < StandardError; end
22
+ class ColumnMismatchError < StandardError; end
23
+
24
+ #
25
+ # [name] a symbol to reference this file definition later
26
+ # [option] a hash of default options for all sub-elements
27
+ # and a block that defines the sections of the file.
28
+ #
29
+ # returns: +Definition+ instance for this file description.
30
+ #
31
+ def self.define(name, options={}) # yields definition
32
+ definition = Definition.new(options)
33
+ yield(definition)
34
+ definitions[name] = definition
35
+ definition
36
+ end
37
+
38
+ #
39
+ # [data] nested hash describing the contents of the sections
40
+ # [definition_name] symbol +name+ used in +define+
41
+ #
42
+ # returns: string of the transformed +data+ (into fixed-width records).
43
+ #
44
+ def self.generate(definition_name, data)
45
+ definition = definition(definition_name)
46
+ raise ArgumentError.new("Definition name '#{name}' was not found.") unless definition
47
+ generator = Generator.new(definition)
48
+ generator.generate(data)
49
+ end
50
+
51
+ #
52
+ # [file] IO object to write the +generate+d data
53
+ # [definition_name] symbol +name+ used in +define+
54
+ # [data] nested hash describing the contents of the sections
55
+ #
56
+ # writes transformed data to +file+ object as fixed-width records.
57
+ #
58
+ def self.write(file, definition_name, data)
59
+ file.write(generate(definition_name, data))
60
+ end
61
+
62
+ #
63
+ # [file] IO object from which to read the fixed-width text records
64
+ # [definition_name] symbol +name+ used in +define+
65
+ #
66
+ # returns: parsed text records in a nested hash.
67
+ #
68
+ def self.parse(file, definition_name)
69
+ definition = definition(definition_name)
70
+ raise ArgumentError.new("Definition name '#{definition_name}' was not found.") unless definition
71
+ parser = Parser.new(definition, file)
72
+ parser.parse
73
+ end
74
+
75
+ private
76
+
77
+ def self.definitions
78
+ @@definitions ||= {}
79
+ end
80
+
81
+ def self.definition(name)
82
+ definitions[name]
83
+ end
84
+ end
@@ -0,0 +1,20 @@
1
+ class FixedWidth
2
+ class Generator
3
+
4
+ def initialize(definition)
5
+ @definition = definition
6
+ end
7
+
8
+ def generate(data)
9
+ @builder = []
10
+ @definition.sections.each do |section|
11
+ content = data[section.name]
12
+ arrayed_content = content.is_a?(Array) ? content : [content]
13
+ raise FixedWidth::RequiredSectionEmptyError.new("Required section '#{section.name}' was empty.") if (content.nil? || content.empty?) && !section.optional
14
+ arrayed_content.each {|row| @builder << section.format(row) }
15
+ end
16
+ @builder.join("\n")
17
+ end
18
+
19
+ end
20
+ end
@@ -0,0 +1,47 @@
1
+ class FixedWidth
2
+ class Parser
3
+ def initialize(definition, file)
4
+ @definition = definition
5
+ @file = file
6
+ end
7
+
8
+ def parse
9
+ @parsed = {}
10
+ @content = read_file
11
+ unless @content.empty?
12
+ @definition.sections.each do |section|
13
+ rows = fill_content(section)
14
+ raise FixedWidth::RequiredSectionNotFoundError.new("Required section '#{section.name}' was not found.") unless rows > 0 || section.optional
15
+ end
16
+ end
17
+ @parsed
18
+ end
19
+
20
+ private
21
+
22
+ def read_file
23
+ @file.readlines.map(&:chomp)
24
+ end
25
+
26
+ def fill_content(section)
27
+ matches = 0
28
+ loop do
29
+ line = @content.first
30
+ break unless section.match(line)
31
+ add_to_section(section, line)
32
+ matches += 1
33
+ @content.shift
34
+ end
35
+ matches
36
+ end
37
+
38
+ def add_to_section(section, line)
39
+ if section.singular
40
+ @parsed[section.name] = section.parse(line)
41
+ else
42
+ @parsed[section.name] ||= []
43
+ @parsed[section.name] << section.parse(line)
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,90 @@
1
+ class FixedWidth
2
+ class Section
3
+ attr_accessor :definition, :optional, :singular
4
+ attr_reader :name, :columns, :options
5
+
6
+ def initialize(name, options={})
7
+ @name = name
8
+ @options = options
9
+ @columns = []
10
+ @trap = options[:trap]
11
+ @optional = options[:optional] || false
12
+ @singular = options[:singular] || false
13
+ end
14
+
15
+ def column(name, length, options={})
16
+ if column_names_by_group(options[:group]).include?(name)
17
+ raise FixedWidth::DuplicateColumnNameError.new("You have already defined a column named '#{name}' in the '#{options[:group].inspect}' group.")
18
+ end
19
+ if column_names_by_group(nil).include?(options[:group])
20
+ raise FixedWidth::DuplicateGroupNameError.new("You have already defined a column named '#{options[:group]}'; you cannot have a group and column of the same name.")
21
+ end
22
+ if group_names.include?(name)
23
+ raise FixedWidth::DuplicateGroupNameError.new("You have already defined a group named '#{name}'; you cannot have a group and column of the same name.")
24
+ end
25
+
26
+ col = Column.new(name, length, @options.merge(options))
27
+ @columns << col
28
+ col
29
+ end
30
+
31
+ def spacer(length, spacer=nil)
32
+ options = {}
33
+ options[:padding] = spacer if spacer
34
+ column(:spacer, length, options)
35
+ end
36
+
37
+ def trap(&block)
38
+ @trap = block
39
+ end
40
+
41
+ def template(name)
42
+ template = @definition.templates[name]
43
+ raise ArgumentError.new("Template '#{name}' not found as a known template.") unless template
44
+ @columns += template.columns
45
+ # Section options should trump template options
46
+ @options = template.options.merge(@options)
47
+ end
48
+
49
+ def format(data)
50
+ @columns.map do |c|
51
+ hash = c.group ? data[c.group] : data
52
+ c.format(hash[c.name])
53
+ end.join
54
+ end
55
+
56
+ def parse(line)
57
+ row = group_names.inject({}) {|h,g| h[g] = {}; h }
58
+
59
+ cursor = 0
60
+ @columns.each do |c|
61
+ unless c.name == :spacer
62
+ assignee = c.group ? row[c.group] : row
63
+ capture = line.mb_chars[cursor..cursor+c.length-1] || ''
64
+ assignee[c.name] = c.parse(capture)
65
+ end
66
+ cursor += c.length
67
+ end
68
+
69
+ row
70
+ end
71
+
72
+ def match(raw_line)
73
+ raw_line.nil? ? false : @trap.call(raw_line)
74
+ end
75
+
76
+ def method_missing(method, *args)
77
+ column(method, *args)
78
+ end
79
+
80
+ private
81
+
82
+ def column_names_by_group(group)
83
+ @columns.select{|c| c.group == group }.map(&:name) - [:spacer]
84
+ end
85
+
86
+ def group_names
87
+ @columns.map(&:group).compact.uniq
88
+ end
89
+ end
90
+ end
@@ -0,0 +1,205 @@
1
+ require File.join(File.dirname(__FILE__), 'spec_helper')
2
+
3
+ describe FixedWidth::Column do
4
+ before(:each) do
5
+ @name = :id
6
+ @length = 5
7
+ @column = FixedWidth::Column.new(@name, @length)
8
+ end
9
+
10
+ describe "when being created" do
11
+ it "should have a name" do
12
+ @column.name.should == @name
13
+ end
14
+
15
+ it "should have a length" do
16
+ @column.length.should == @length
17
+ end
18
+
19
+ it "should have a default padding" do
20
+ @column.padding.should == ' '
21
+ end
22
+
23
+ it "should have a default alignment" do
24
+ @column.alignment.should == :right
25
+ end
26
+
27
+ it "should have a default truncation" do
28
+ @column.truncate.should be_false
29
+ end
30
+ end
31
+
32
+ describe "when specifying an alignment" do
33
+ before(:each) do
34
+ @column = FixedWidth::Column.new(@name, @length, :align => :left)
35
+ end
36
+
37
+ it "should only accept :right or :left for an alignment" do
38
+ lambda{ FixedWidth::Column.new(@name, @length, :align => :bogus) }.should raise_error(ArgumentError, "Option :align only accepts :right (default) or :left")
39
+ end
40
+
41
+ it "should override the default alignment" do
42
+ @column.alignment.should == :left
43
+ end
44
+ end
45
+
46
+ describe "when specifying padding" do
47
+ before(:each) do
48
+ @column = FixedWidth::Column.new(@name, @length, :padding => '0')
49
+ end
50
+
51
+ it "should override the default padding" do
52
+ @column.padding.should == '0'
53
+ end
54
+ end
55
+
56
+ describe "when parsing a value from a file" do
57
+ it "should return nil for blank fields if specified" do
58
+ @column = FixedWidth::Column.new(@name, @length, :padding => '0', :nil_blank => true)
59
+ @column.parse(' name ').should == 'name '
60
+ @column.parse(" \t \n").should == nil
61
+ @column.parse(" 0 \n").should == "0 \n"
62
+ @column.parse('').should == nil
63
+ end
64
+
65
+ it "should default to returning formatted strings if nil_blank is not set" do
66
+ @column = FixedWidth::Column.new(@name, @length, :padding => '0', :nil_blank => false)
67
+ @column.parse(' name ').should == 'name '
68
+ @column.parse(" \t \n").should == ""
69
+ @column.parse(" 0 \n").should == "0 \n"
70
+ @column.parse('').should == ""
71
+ end
72
+
73
+ it "should default to a right-aligned string" do
74
+ @column.parse(' name ').should == 'name '
75
+ @column.parse(" \t 234").should == '234'
76
+ @column.parse(" 000000234 \n").should == "000000234 \n"
77
+ @column.parse('12.34').should == '12.34'
78
+ end
79
+
80
+ it "should default to a left-aligned string if no parser specified, but alignment is left" do
81
+ @column = FixedWidth::Column.new(@name, @length, :align => :left)
82
+
83
+ @column.parse(' name ').should == ' name'
84
+ @column.parse(" \t 234").should == " \t 234"
85
+ @column.parse(" 000000234 \n").should == " 000000234"
86
+ @column.parse('12.34').should == '12.34'
87
+ end
88
+
89
+ it "should support a symbol as the parser (:to_i)" do
90
+ @column = FixedWidth::Column.new(:amount, 10, :parser => :to_i)
91
+ @column.parse('234 ').should == 234
92
+ @column.parse(' 234').should == 234
93
+ @column.parse('00000234').should == 234
94
+ @column.parse('Ryan ').should == 0
95
+ @column.parse('00023.45').should == 23
96
+ end
97
+
98
+ it "should support a symbol as the parser (:to_f)" do
99
+ @column = FixedWidth::Column.new(:amount, 10, :parser => :to_f)
100
+ @column.parse(' 234.45').should == 234.45
101
+ @column.parse('234.5600').should == 234.56
102
+ @column.parse(' 234').should == 234.0
103
+ @column.parse('00000234').should == 234.0
104
+ @column.parse('Ryan ').should == 0
105
+ @column.parse('00023.45').should == 23.45
106
+ end
107
+
108
+ it "should support a lambda as the parser (date)" do
109
+ @column = FixedWidth::Column.new(:date, 10, :parser => lambda{|x| Date.strptime(x, "%m%d%Y")})
110
+ dt = @column.parse('08222009')
111
+ dt.should be_a(Date)
112
+ dt.to_s.should == '2009-08-22'
113
+ end
114
+ end
115
+
116
+ describe "when applying formatting options" do
117
+ it "should respect a right alignment" do
118
+ @column = FixedWidth::Column.new(@name, @length, :align => :right)
119
+ @column.format(25).should == ' 25'
120
+ end
121
+
122
+ it "should respect a left alignment" do
123
+ @column = FixedWidth::Column.new(@name, @length, :align => :left)
124
+ @column.format(25).should == '25 '
125
+ end
126
+
127
+ it "should respect padding with spaces" do
128
+ @column = FixedWidth::Column.new(@name, @length, :padding => ' ')
129
+ @column.format(25).should == ' 25'
130
+ end
131
+
132
+ it "should respect padding with zeros" do
133
+ @column = FixedWidth::Column.new(@name, @length, :padding => '0')
134
+ @column.format(25).should == '00025'
135
+ end
136
+
137
+ it "should work with non-string inputs and the default formatter" do
138
+ @column = FixedWidth::Column.new(@name, @length)
139
+ @column.format(25).should == ' 25'
140
+ end
141
+ end
142
+
143
+ describe "when formatting values for a file" do
144
+ it "should default to a string" do
145
+ @column = FixedWidth::Column.new(:name, 10)
146
+ @column.format('Bill').should == ' Bill'
147
+ end
148
+
149
+ describe "whose size is too long" do
150
+ it "should raise an error if truncate is false" do
151
+ @value = "XX" * @length
152
+ lambda { @column.format(@value) }.should raise_error(
153
+ FixedWidth::FormattedStringExceedsLengthError,
154
+ "The formatted value '#{@value}' in column '#{@name}' exceeds the allowed length of #{@length} chararacters."
155
+ )
156
+ end
157
+
158
+ it "should truncate from the left if truncate is true and aligned left" do
159
+ @column = FixedWidth::Column.new(@name, @length, :truncate => true, :align => :left)
160
+ @column.format("This is too long").should == "This "
161
+ end
162
+
163
+ it "should truncate from the right if truncate is true and aligned right" do
164
+ @column = FixedWidth::Column.new(@name, @length, :truncate => true, :align => :right)
165
+ @column.format("This is too long").should == " long"
166
+ end
167
+ end
168
+
169
+ it "should support a symbol formatter (:to_s)" do
170
+ @column = FixedWidth::Column.new(:amount, 10, :formatter => :to_s)
171
+ @column.format(234).should == ' 234'
172
+ @column.format('234').should == ' 234'
173
+ end
174
+
175
+ it "should support a lambda formatter (.to_f.to_s)" do
176
+ @column = FixedWidth::Column.new(:amount, 10, :formatter => lambda {|x| x.to_f.to_s })
177
+ @column.format(234.45).should == ' 234.45'
178
+ @column.format('234.4500').should == ' 234.45'
179
+ @column.format('3').should == ' 3.0'
180
+ end
181
+
182
+ it "should support a lambda formatter (float with sprintf)" do
183
+ @column = FixedWidth::Column.new(:amount, 10, :formatter => lambda {|x| "%.3f" % x.to_f })
184
+ @column.format(234.45).should == ' 234.450'
185
+ @column.format('234.4500').should == ' 234.450'
186
+ @column.format('3').should == ' 3.000'
187
+ end
188
+
189
+ it "should support the float type with a format, alignment and padding" do
190
+ @column = FixedWidth::Column.new(:amount, 10, :formatter => lambda {|x| "%.2f" % x.to_f }, :align => :left, :padding => '0')
191
+ @column.format(234.45).should == '234.450000'
192
+ @column = FixedWidth::Column.new(:amount, 10, :formatter => lambda {|x| "%.2f" % x.to_f }, :align => :right, :padding => '0')
193
+ @column.format('234.400').should == '0000234.40'
194
+ @column = FixedWidth::Column.new(:amount, 10, :formatter => lambda {|x| "%.4f" % x.to_f }, :align => :left, :padding => ' ')
195
+ @column.format('3').should == '3.0000 '
196
+ end
197
+
198
+ it "should support the date type with a :format" do
199
+ dt = Date.new(2009, 8, 22)
200
+ @column = FixedWidth::Column.new(:date, 8, :formatter => lambda {|x| x.strftime("%m%d%Y") } )
201
+ @column.format(dt).should == '08222009'
202
+ end
203
+ end
204
+
205
+ end