fixed_width-multibyte 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,14 @@
1
+ $: << File.dirname(__FILE__)
2
+ require 'ostruct'
3
+
4
+ require 'active_support'
5
+ require 'active_support/version'
6
+ require 'active_support/multibyte' if ::ActiveSupport::VERSION::MAJOR >= 3
7
+
8
+ require 'fixed_width/core_ext/symbol'
9
+ require 'fixed_width/fixed_width'
10
+ require 'fixed_width/definition'
11
+ require 'fixed_width/section'
12
+ require 'fixed_width/column'
13
+ require 'fixed_width/parser'
14
+ require 'fixed_width/generator'
@@ -0,0 +1,86 @@
1
+ class FixedWidth
2
+ class Column
3
+ DEFAULT_PADDING = ' '
4
+ DEFAULT_ALIGNMENT = :right
5
+ DEFAULT_TRUNCATE = false
6
+ DEFAULT_FORMATTER = :to_s
7
+
8
+ attr_reader :name, :length, :alignment, :padding, :truncate, :group
9
+
10
+ def initialize(name, length, options={})
11
+ assert_valid_options(options)
12
+ @name = name
13
+ @length = length
14
+ @alignment = options[:align] || DEFAULT_ALIGNMENT
15
+ @padding = options[:padding] || DEFAULT_PADDING
16
+ @truncate = options[:truncate] || DEFAULT_TRUNCATE
17
+
18
+ @group = options[:group]
19
+
20
+ @parser = options[:parser]
21
+ @parser = @parser.to_proc if @parser.is_a?(Symbol)
22
+
23
+ @formatter = options[:formatter]
24
+ @formatter ||= DEFAULT_FORMATTER
25
+ @formatter = @formatter.to_proc if @formatter.is_a?(Symbol)
26
+
27
+ @nil_blank = options[:nil_blank]
28
+ end
29
+
30
+ def parse(value)
31
+ if @nil_blank && blank?(value)
32
+ return nil
33
+ elsif @parser
34
+ @parser.call(value)
35
+ else
36
+ case @alignment
37
+ when :right
38
+ value.lstrip
39
+ when :left
40
+ value.rstrip
41
+ end
42
+ end
43
+ rescue
44
+ raise ParserError.new("The value '#{value}' could not be parsed: #{$!}")
45
+ end
46
+
47
+ def format(value)
48
+ pad(
49
+ validate_size(
50
+ @formatter.call(value)
51
+ )
52
+ )
53
+ end
54
+
55
+ private
56
+ BLANK_REGEX = /^\s*$/
57
+ def blank?(value)
58
+ value =~ BLANK_REGEX
59
+ end
60
+
61
+ def pad(value)
62
+ case @alignment
63
+ when :left
64
+ value.ljust(@length, @padding)
65
+ when :right
66
+ value.rjust(@length, @padding)
67
+ end
68
+ end
69
+
70
+ def assert_valid_options(options)
71
+ unless options[:align].nil? || [:left, :right].include?(options[:align])
72
+ raise ArgumentError.new("Option :align only accepts :right (default) or :left")
73
+ end
74
+ end
75
+
76
+ def validate_size(result)
77
+ return result if result.length <= @length
78
+ raise FixedWidth::FormattedStringExceedsLengthError.new(
79
+ "The formatted value '#{result}' in column '#{@name}' exceeds the allowed length of #{@length} chararacters.") unless @truncate
80
+ case @alignment
81
+ when :right then result[-@length,@length]
82
+ when :left then result[0,@length]
83
+ end
84
+ end
85
+ end
86
+ end
@@ -0,0 +1,17 @@
1
+ #
2
+ # Taken from ActiveSupport 2.3.5 lib/active_support/core_ext/symbol.rb
3
+ #
4
+ unless :to_proc.respond_to?(:to_proc)
5
+ class Symbol
6
+ # Turns the symbol into a simple proc, which is especially useful for enumerations. Examples:
7
+ #
8
+ # # The same as people.collect { |p| p.name }
9
+ # people.collect(&:name)
10
+ #
11
+ # # The same as people.select { |p| p.manager? }.collect { |p| p.salary }
12
+ # people.select(&:manager?).collect(&:salary)
13
+ def to_proc
14
+ Proc.new { |*args| args.shift.__send__(self, *args) }
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,31 @@
1
+ class FixedWidth
2
+ class Definition
3
+ attr_reader :sections, :templates, :options
4
+
5
+ def initialize(options={})
6
+ @sections = []
7
+ @templates = {}
8
+ @options = { :align => :right }.merge(options)
9
+ end
10
+
11
+ def section(name, options={}, &block)
12
+ raise DuplicateSectionNameError.new("Duplicate section name: '#{name}'") if @sections.detect{|s| s.name == name }
13
+
14
+ section = FixedWidth::Section.new(name, @options.merge(options))
15
+ section.definition = self
16
+ yield(section)
17
+ @sections << section
18
+ section
19
+ end
20
+
21
+ def template(name, options={}, &block)
22
+ section = FixedWidth::Section.new(name, @options.merge(options))
23
+ yield(section)
24
+ @templates[name] = section
25
+ end
26
+
27
+ def method_missing(method, *args, &block)
28
+ section(method, *args, &block)
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,84 @@
1
+ #
2
+ # =DESCRIPTION:
3
+ #
4
+ # A simple, clean DSL for describing, writing, and parsing fixed-width text files.
5
+ #
6
+ # =FEATURES:
7
+ #
8
+ # * Easy DSL syntax
9
+ # * Can parse and format fixed width files
10
+ # * Templated sections for reuse
11
+ #
12
+ # For examples, see examples/*.rb or the README.
13
+ #
14
+ class FixedWidth
15
+ class ParserError < RuntimeError; end
16
+ class DuplicateColumnNameError < StandardError; end
17
+ class DuplicateGroupNameError < StandardError; end
18
+ class DuplicateSectionNameError < StandardError; end
19
+ class RequiredSectionNotFoundError < StandardError; end
20
+ class RequiredSectionEmptyError < StandardError; end
21
+ class FormattedStringExceedsLengthError < StandardError; end
22
+ class ColumnMismatchError < StandardError; end
23
+
24
+ #
25
+ # [name] a symbol to reference this file definition later
26
+ # [option] a hash of default options for all sub-elements
27
+ # and a block that defines the sections of the file.
28
+ #
29
+ # returns: +Definition+ instance for this file description.
30
+ #
31
+ def self.define(name, options={}) # yields definition
32
+ definition = Definition.new(options)
33
+ yield(definition)
34
+ definitions[name] = definition
35
+ definition
36
+ end
37
+
38
+ #
39
+ # [data] nested hash describing the contents of the sections
40
+ # [definition_name] symbol +name+ used in +define+
41
+ #
42
+ # returns: string of the transformed +data+ (into fixed-width records).
43
+ #
44
+ def self.generate(definition_name, data)
45
+ definition = definition(definition_name)
46
+ raise ArgumentError.new("Definition name '#{name}' was not found.") unless definition
47
+ generator = Generator.new(definition)
48
+ generator.generate(data)
49
+ end
50
+
51
+ #
52
+ # [file] IO object to write the +generate+d data
53
+ # [definition_name] symbol +name+ used in +define+
54
+ # [data] nested hash describing the contents of the sections
55
+ #
56
+ # writes transformed data to +file+ object as fixed-width records.
57
+ #
58
+ def self.write(file, definition_name, data)
59
+ file.write(generate(definition_name, data))
60
+ end
61
+
62
+ #
63
+ # [file] IO object from which to read the fixed-width text records
64
+ # [definition_name] symbol +name+ used in +define+
65
+ #
66
+ # returns: parsed text records in a nested hash.
67
+ #
68
+ def self.parse(file, definition_name)
69
+ definition = definition(definition_name)
70
+ raise ArgumentError.new("Definition name '#{definition_name}' was not found.") unless definition
71
+ parser = Parser.new(definition, file)
72
+ parser.parse
73
+ end
74
+
75
+ private
76
+
77
+ def self.definitions
78
+ @@definitions ||= {}
79
+ end
80
+
81
+ def self.definition(name)
82
+ definitions[name]
83
+ end
84
+ end
@@ -0,0 +1,20 @@
1
+ class FixedWidth
2
+ class Generator
3
+
4
+ def initialize(definition)
5
+ @definition = definition
6
+ end
7
+
8
+ def generate(data)
9
+ @builder = []
10
+ @definition.sections.each do |section|
11
+ content = data[section.name]
12
+ arrayed_content = content.is_a?(Array) ? content : [content]
13
+ raise FixedWidth::RequiredSectionEmptyError.new("Required section '#{section.name}' was empty.") if (content.nil? || content.empty?) && !section.optional
14
+ arrayed_content.each {|row| @builder << section.format(row) }
15
+ end
16
+ @builder.join("\n")
17
+ end
18
+
19
+ end
20
+ end
@@ -0,0 +1,47 @@
1
+ class FixedWidth
2
+ class Parser
3
+ def initialize(definition, file)
4
+ @definition = definition
5
+ @file = file
6
+ end
7
+
8
+ def parse
9
+ @parsed = {}
10
+ @content = read_file
11
+ unless @content.empty?
12
+ @definition.sections.each do |section|
13
+ rows = fill_content(section)
14
+ raise FixedWidth::RequiredSectionNotFoundError.new("Required section '#{section.name}' was not found.") unless rows > 0 || section.optional
15
+ end
16
+ end
17
+ @parsed
18
+ end
19
+
20
+ private
21
+
22
+ def read_file
23
+ @file.readlines.map(&:chomp)
24
+ end
25
+
26
+ def fill_content(section)
27
+ matches = 0
28
+ loop do
29
+ line = @content.first
30
+ break unless section.match(line)
31
+ add_to_section(section, line)
32
+ matches += 1
33
+ @content.shift
34
+ end
35
+ matches
36
+ end
37
+
38
+ def add_to_section(section, line)
39
+ if section.singular
40
+ @parsed[section.name] = section.parse(line)
41
+ else
42
+ @parsed[section.name] ||= []
43
+ @parsed[section.name] << section.parse(line)
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,90 @@
1
+ class FixedWidth
2
+ class Section
3
+ attr_accessor :definition, :optional, :singular
4
+ attr_reader :name, :columns, :options
5
+
6
+ def initialize(name, options={})
7
+ @name = name
8
+ @options = options
9
+ @columns = []
10
+ @trap = options[:trap]
11
+ @optional = options[:optional] || false
12
+ @singular = options[:singular] || false
13
+ end
14
+
15
+ def column(name, length, options={})
16
+ if column_names_by_group(options[:group]).include?(name)
17
+ raise FixedWidth::DuplicateColumnNameError.new("You have already defined a column named '#{name}' in the '#{options[:group].inspect}' group.")
18
+ end
19
+ if column_names_by_group(nil).include?(options[:group])
20
+ raise FixedWidth::DuplicateGroupNameError.new("You have already defined a column named '#{options[:group]}'; you cannot have a group and column of the same name.")
21
+ end
22
+ if group_names.include?(name)
23
+ raise FixedWidth::DuplicateGroupNameError.new("You have already defined a group named '#{name}'; you cannot have a group and column of the same name.")
24
+ end
25
+
26
+ col = Column.new(name, length, @options.merge(options))
27
+ @columns << col
28
+ col
29
+ end
30
+
31
+ def spacer(length, spacer=nil)
32
+ options = {}
33
+ options[:padding] = spacer if spacer
34
+ column(:spacer, length, options)
35
+ end
36
+
37
+ def trap(&block)
38
+ @trap = block
39
+ end
40
+
41
+ def template(name)
42
+ template = @definition.templates[name]
43
+ raise ArgumentError.new("Template '#{name}' not found as a known template.") unless template
44
+ @columns += template.columns
45
+ # Section options should trump template options
46
+ @options = template.options.merge(@options)
47
+ end
48
+
49
+ def format(data)
50
+ @columns.map do |c|
51
+ hash = c.group ? data[c.group] : data
52
+ c.format(hash[c.name])
53
+ end.join
54
+ end
55
+
56
+ def parse(line)
57
+ row = group_names.inject({}) {|h,g| h[g] = {}; h }
58
+
59
+ cursor = 0
60
+ @columns.each do |c|
61
+ unless c.name == :spacer
62
+ assignee = c.group ? row[c.group] : row
63
+ capture = line.mb_chars[cursor..cursor+c.length-1] || ''
64
+ assignee[c.name] = c.parse(capture)
65
+ end
66
+ cursor += c.length
67
+ end
68
+
69
+ row
70
+ end
71
+
72
+ def match(raw_line)
73
+ raw_line.nil? ? false : @trap.call(raw_line)
74
+ end
75
+
76
+ def method_missing(method, *args)
77
+ column(method, *args)
78
+ end
79
+
80
+ private
81
+
82
+ def column_names_by_group(group)
83
+ @columns.select{|c| c.group == group }.map(&:name) - [:spacer]
84
+ end
85
+
86
+ def group_names
87
+ @columns.map(&:group).compact.uniq
88
+ end
89
+ end
90
+ end
@@ -0,0 +1,205 @@
1
+ require File.join(File.dirname(__FILE__), 'spec_helper')
2
+
3
+ describe FixedWidth::Column do
4
+ before(:each) do
5
+ @name = :id
6
+ @length = 5
7
+ @column = FixedWidth::Column.new(@name, @length)
8
+ end
9
+
10
+ describe "when being created" do
11
+ it "should have a name" do
12
+ @column.name.should == @name
13
+ end
14
+
15
+ it "should have a length" do
16
+ @column.length.should == @length
17
+ end
18
+
19
+ it "should have a default padding" do
20
+ @column.padding.should == ' '
21
+ end
22
+
23
+ it "should have a default alignment" do
24
+ @column.alignment.should == :right
25
+ end
26
+
27
+ it "should have a default truncation" do
28
+ @column.truncate.should be_false
29
+ end
30
+ end
31
+
32
+ describe "when specifying an alignment" do
33
+ before(:each) do
34
+ @column = FixedWidth::Column.new(@name, @length, :align => :left)
35
+ end
36
+
37
+ it "should only accept :right or :left for an alignment" do
38
+ lambda{ FixedWidth::Column.new(@name, @length, :align => :bogus) }.should raise_error(ArgumentError, "Option :align only accepts :right (default) or :left")
39
+ end
40
+
41
+ it "should override the default alignment" do
42
+ @column.alignment.should == :left
43
+ end
44
+ end
45
+
46
+ describe "when specifying padding" do
47
+ before(:each) do
48
+ @column = FixedWidth::Column.new(@name, @length, :padding => '0')
49
+ end
50
+
51
+ it "should override the default padding" do
52
+ @column.padding.should == '0'
53
+ end
54
+ end
55
+
56
+ describe "when parsing a value from a file" do
57
+ it "should return nil for blank fields if specified" do
58
+ @column = FixedWidth::Column.new(@name, @length, :padding => '0', :nil_blank => true)
59
+ @column.parse(' name ').should == 'name '
60
+ @column.parse(" \t \n").should == nil
61
+ @column.parse(" 0 \n").should == "0 \n"
62
+ @column.parse('').should == nil
63
+ end
64
+
65
+ it "should default to returning formatted strings if nil_blank is not set" do
66
+ @column = FixedWidth::Column.new(@name, @length, :padding => '0', :nil_blank => false)
67
+ @column.parse(' name ').should == 'name '
68
+ @column.parse(" \t \n").should == ""
69
+ @column.parse(" 0 \n").should == "0 \n"
70
+ @column.parse('').should == ""
71
+ end
72
+
73
+ it "should default to a right-aligned string" do
74
+ @column.parse(' name ').should == 'name '
75
+ @column.parse(" \t 234").should == '234'
76
+ @column.parse(" 000000234 \n").should == "000000234 \n"
77
+ @column.parse('12.34').should == '12.34'
78
+ end
79
+
80
+ it "should default to a left-aligned string if no parser specified, but alignment is left" do
81
+ @column = FixedWidth::Column.new(@name, @length, :align => :left)
82
+
83
+ @column.parse(' name ').should == ' name'
84
+ @column.parse(" \t 234").should == " \t 234"
85
+ @column.parse(" 000000234 \n").should == " 000000234"
86
+ @column.parse('12.34').should == '12.34'
87
+ end
88
+
89
+ it "should support a symbol as the parser (:to_i)" do
90
+ @column = FixedWidth::Column.new(:amount, 10, :parser => :to_i)
91
+ @column.parse('234 ').should == 234
92
+ @column.parse(' 234').should == 234
93
+ @column.parse('00000234').should == 234
94
+ @column.parse('Ryan ').should == 0
95
+ @column.parse('00023.45').should == 23
96
+ end
97
+
98
+ it "should support a symbol as the parser (:to_f)" do
99
+ @column = FixedWidth::Column.new(:amount, 10, :parser => :to_f)
100
+ @column.parse(' 234.45').should == 234.45
101
+ @column.parse('234.5600').should == 234.56
102
+ @column.parse(' 234').should == 234.0
103
+ @column.parse('00000234').should == 234.0
104
+ @column.parse('Ryan ').should == 0
105
+ @column.parse('00023.45').should == 23.45
106
+ end
107
+
108
+ it "should support a lambda as the parser (date)" do
109
+ @column = FixedWidth::Column.new(:date, 10, :parser => lambda{|x| Date.strptime(x, "%m%d%Y")})
110
+ dt = @column.parse('08222009')
111
+ dt.should be_a(Date)
112
+ dt.to_s.should == '2009-08-22'
113
+ end
114
+ end
115
+
116
+ describe "when applying formatting options" do
117
+ it "should respect a right alignment" do
118
+ @column = FixedWidth::Column.new(@name, @length, :align => :right)
119
+ @column.format(25).should == ' 25'
120
+ end
121
+
122
+ it "should respect a left alignment" do
123
+ @column = FixedWidth::Column.new(@name, @length, :align => :left)
124
+ @column.format(25).should == '25 '
125
+ end
126
+
127
+ it "should respect padding with spaces" do
128
+ @column = FixedWidth::Column.new(@name, @length, :padding => ' ')
129
+ @column.format(25).should == ' 25'
130
+ end
131
+
132
+ it "should respect padding with zeros" do
133
+ @column = FixedWidth::Column.new(@name, @length, :padding => '0')
134
+ @column.format(25).should == '00025'
135
+ end
136
+
137
+ it "should work with non-string inputs and the default formatter" do
138
+ @column = FixedWidth::Column.new(@name, @length)
139
+ @column.format(25).should == ' 25'
140
+ end
141
+ end
142
+
143
+ describe "when formatting values for a file" do
144
+ it "should default to a string" do
145
+ @column = FixedWidth::Column.new(:name, 10)
146
+ @column.format('Bill').should == ' Bill'
147
+ end
148
+
149
+ describe "whose size is too long" do
150
+ it "should raise an error if truncate is false" do
151
+ @value = "XX" * @length
152
+ lambda { @column.format(@value) }.should raise_error(
153
+ FixedWidth::FormattedStringExceedsLengthError,
154
+ "The formatted value '#{@value}' in column '#{@name}' exceeds the allowed length of #{@length} chararacters."
155
+ )
156
+ end
157
+
158
+ it "should truncate from the left if truncate is true and aligned left" do
159
+ @column = FixedWidth::Column.new(@name, @length, :truncate => true, :align => :left)
160
+ @column.format("This is too long").should == "This "
161
+ end
162
+
163
+ it "should truncate from the right if truncate is true and aligned right" do
164
+ @column = FixedWidth::Column.new(@name, @length, :truncate => true, :align => :right)
165
+ @column.format("This is too long").should == " long"
166
+ end
167
+ end
168
+
169
+ it "should support a symbol formatter (:to_s)" do
170
+ @column = FixedWidth::Column.new(:amount, 10, :formatter => :to_s)
171
+ @column.format(234).should == ' 234'
172
+ @column.format('234').should == ' 234'
173
+ end
174
+
175
+ it "should support a lambda formatter (.to_f.to_s)" do
176
+ @column = FixedWidth::Column.new(:amount, 10, :formatter => lambda {|x| x.to_f.to_s })
177
+ @column.format(234.45).should == ' 234.45'
178
+ @column.format('234.4500').should == ' 234.45'
179
+ @column.format('3').should == ' 3.0'
180
+ end
181
+
182
+ it "should support a lambda formatter (float with sprintf)" do
183
+ @column = FixedWidth::Column.new(:amount, 10, :formatter => lambda {|x| "%.3f" % x.to_f })
184
+ @column.format(234.45).should == ' 234.450'
185
+ @column.format('234.4500').should == ' 234.450'
186
+ @column.format('3').should == ' 3.000'
187
+ end
188
+
189
+ it "should support the float type with a format, alignment and padding" do
190
+ @column = FixedWidth::Column.new(:amount, 10, :formatter => lambda {|x| "%.2f" % x.to_f }, :align => :left, :padding => '0')
191
+ @column.format(234.45).should == '234.450000'
192
+ @column = FixedWidth::Column.new(:amount, 10, :formatter => lambda {|x| "%.2f" % x.to_f }, :align => :right, :padding => '0')
193
+ @column.format('234.400').should == '0000234.40'
194
+ @column = FixedWidth::Column.new(:amount, 10, :formatter => lambda {|x| "%.4f" % x.to_f }, :align => :left, :padding => ' ')
195
+ @column.format('3').should == '3.0000 '
196
+ end
197
+
198
+ it "should support the date type with a :format" do
199
+ dt = Date.new(2009, 8, 22)
200
+ @column = FixedWidth::Column.new(:date, 8, :formatter => lambda {|x| x.strftime("%m%d%Y") } )
201
+ @column.format(dt).should == '08222009'
202
+ end
203
+ end
204
+
205
+ end