ascii-data-tools 0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +3 -0
- data/.rvmrc +1 -0
- data/.travis.yml +4 -0
- data/Gemfile +3 -0
- data/Gemfile.lock +40 -0
- data/LICENSE.GPL2 +339 -0
- data/README.rdoc +52 -0
- data/Rakefile +42 -0
- data/TODO +4 -0
- data/ascii-data-tools.gemspec +30 -0
- data/bin/ascii-data-cat +13 -0
- data/bin/ascii-data-edit +13 -0
- data/bin/ascii-data-norm +13 -0
- data/bin/ascii-data-qdiff +13 -0
- data/bin/ascii-data-tools-config +9 -0
- data/examples/big +10000 -0
- data/examples/built_in_records.gz +0 -0
- data/examples/slightly_modified_built_in_records.gz +0 -0
- data/features/ascii-data-cat.feature +110 -0
- data/features/ascii-data-edit.feature +91 -0
- data/features/ascii-data-qdiff.feature +54 -0
- data/features/encoding_decoding.feature +68 -0
- data/features/normaliser.feature +27 -0
- data/features/plugins.feature +73 -0
- data/features/record_recognition.feature +61 -0
- data/features/step_definitions/ascii-data-cat_steps.rb +48 -0
- data/features/step_definitions/ascii-data-edit_steps.rb +38 -0
- data/features/step_definitions/ascii-data-norm_steps.rb +7 -0
- data/features/step_definitions/ascii-data-qdiff_steps.rb +43 -0
- data/features/step_definitions/encoding_decoding_steps.rb +23 -0
- data/features/step_definitions/plugins_steps.rb +11 -0
- data/features/step_definitions/record_recognition_steps.rb +10 -0
- data/features/support/env.rb +5 -0
- data/lib/ascii-data-tools.rb +8 -0
- data/lib/ascii-data-tools/configuration.rb +169 -0
- data/lib/ascii-data-tools/configuration_printer.rb +38 -0
- data/lib/ascii-data-tools/controller.rb +123 -0
- data/lib/ascii-data-tools/discover.rb +19 -0
- data/lib/ascii-data-tools/external_programs.rb +23 -0
- data/lib/ascii-data-tools/filter.rb +148 -0
- data/lib/ascii-data-tools/filter/diffing.rb +139 -0
- data/lib/ascii-data-tools/formatting.rb +109 -0
- data/lib/ascii-data-tools/global_autodiscovery.rb +21 -0
- data/lib/ascii-data-tools/record.rb +50 -0
- data/lib/ascii-data-tools/record_type.rb +139 -0
- data/lib/ascii-data-tools/record_type/builder.rb +50 -0
- data/lib/ascii-data-tools/record_type/decoder.rb +77 -0
- data/lib/ascii-data-tools/record_type/encoder.rb +17 -0
- data/lib/ascii-data-tools/record_type/field.rb +168 -0
- data/lib/ascii-data-tools/record_type/normaliser.rb +38 -0
- data/lib/ascii-data-tools/ruby_extensions.rb +7 -0
- data/lib/ascii-data-tools/version.rb +3 -0
- data/spec/ascii-data-tools/configuration_printer_spec.rb +51 -0
- data/spec/ascii-data-tools/configuration_spec.rb +153 -0
- data/spec/ascii-data-tools/discover_spec.rb +8 -0
- data/spec/ascii-data-tools/filter/diffing_spec.rb +82 -0
- data/spec/ascii-data-tools/filter_spec.rb +107 -0
- data/spec/ascii-data-tools/formatting_spec.rb +106 -0
- data/spec/ascii-data-tools/record_spec.rb +49 -0
- data/spec/ascii-data-tools/record_type/builder_spec.rb +69 -0
- data/spec/ascii-data-tools/record_type/decoder_spec.rb +73 -0
- data/spec/ascii-data-tools/record_type/encoder_spec.rb +32 -0
- data/spec/ascii-data-tools/record_type/field_spec.rb +160 -0
- data/spec/ascii-data-tools/record_type/normaliser_spec.rb +25 -0
- data/spec/ascii-data-tools/record_type_spec.rb +175 -0
- data/spec/filter_helper.rb +24 -0
- data/spec/record_type_helpers.rb +8 -0
- data/spec/spec.opts +2 -0
- data/spec/spec_helper.rb +5 -0
- metadata +196 -0
@@ -0,0 +1,19 @@
|
|
1
|
+
AsciiDataTools.configure do
|
2
|
+
record_type("EXAMPLE01") do
|
3
|
+
field "RECORD_TYPE", :length => 9, :constrained_to => "EXAMPLE01"
|
4
|
+
field "USAGE", :length => 3
|
5
|
+
field "A_NUMBER", :length => 16
|
6
|
+
field "B_NUMBER", :length => 16
|
7
|
+
field "CHARGEABLE_UNITS", :length => 4
|
8
|
+
field "END_OF_RECORD", :length => 1
|
9
|
+
end
|
10
|
+
|
11
|
+
record_type("EXAMPLE02") do
|
12
|
+
field "RECORD_TYPE", :length => 9, :constrained_to => "EXAMPLE02"
|
13
|
+
field "APN", :length => 12
|
14
|
+
field "TIMESTAMP", :length => 14, :normalised => true
|
15
|
+
field "SESSION_DURATION", :length => 4
|
16
|
+
field "CHARGEABLE_UNITS", :length => 4
|
17
|
+
field "END_OF_RECORD", :length => 1
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module AsciiDataTools
|
2
|
+
module ExternalPrograms
|
3
|
+
def modify_file_mtime_to(new_mtime)
|
4
|
+
Kernel.system("touch -am -t #{new_mtime.strftime('%Y%m%d%H%M.%S')} #{self.path}")
|
5
|
+
end
|
6
|
+
|
7
|
+
def diff(files)
|
8
|
+
IO.popen(diff_command_for(files))
|
9
|
+
end
|
10
|
+
|
11
|
+
def diff_command_for(files)
|
12
|
+
"diff " + files.collect(&:path).join(' ')
|
13
|
+
end
|
14
|
+
|
15
|
+
def sort(input_file, output_file)
|
16
|
+
Kernel.system("sort #{input_file.path} > #{output_file.path}")
|
17
|
+
end
|
18
|
+
|
19
|
+
def edit_differences(filenames)
|
20
|
+
Kernel.system("vimdiff #{filenames.join(' ')}")
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,148 @@
|
|
1
|
+
require 'tempfile'
|
2
|
+
require 'ascii-data-tools/external_programs'
|
3
|
+
|
4
|
+
module AsciiDataTools
|
5
|
+
module Filter
|
6
|
+
class Filter
|
7
|
+
def initialize(&block)
|
8
|
+
@filter_block = block
|
9
|
+
end
|
10
|
+
|
11
|
+
def <<(upstream_filter)
|
12
|
+
@upstream = upstream_filter
|
13
|
+
self
|
14
|
+
end
|
15
|
+
|
16
|
+
def read
|
17
|
+
filter(upstream.read)
|
18
|
+
end
|
19
|
+
|
20
|
+
def write(output_stream)
|
21
|
+
output_stream << read while has_records?
|
22
|
+
end
|
23
|
+
|
24
|
+
def has_records?
|
25
|
+
upstream.has_records?
|
26
|
+
end
|
27
|
+
|
28
|
+
protected
|
29
|
+
def filter(record)
|
30
|
+
@filter_block[record]
|
31
|
+
end
|
32
|
+
|
33
|
+
def upstream
|
34
|
+
@upstream
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
class BufferingFilter < Filter
|
39
|
+
def initialize(&block)
|
40
|
+
@first_time = true
|
41
|
+
@filter_all_block = block
|
42
|
+
end
|
43
|
+
|
44
|
+
protected
|
45
|
+
def filter_all(tempfile)
|
46
|
+
@filter_all_block[tempfile]
|
47
|
+
end
|
48
|
+
|
49
|
+
def filter(record)
|
50
|
+
record
|
51
|
+
end
|
52
|
+
|
53
|
+
def upstream
|
54
|
+
if @first_time
|
55
|
+
tempfile = buffer_as_tempfile(@upstream)
|
56
|
+
@upstream = InputSource.new(nil, filter_all(tempfile))
|
57
|
+
@first_time = false
|
58
|
+
end
|
59
|
+
@upstream
|
60
|
+
end
|
61
|
+
|
62
|
+
def buffer_as_tempfile(stream)
|
63
|
+
tempfile = Tempfile.new("filter")
|
64
|
+
tempfile << stream.read while stream.has_records?
|
65
|
+
tempfile.open
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
class FormattingFilter < Filter
|
70
|
+
def initialize(filename, type_determiner)
|
71
|
+
@formatter = Formatting::Formatter.new
|
72
|
+
@filename = filename
|
73
|
+
@type_determiner = type_determiner
|
74
|
+
end
|
75
|
+
|
76
|
+
def filter(record)
|
77
|
+
encoded_record = {:ascii_string => record, :filename => @filename}
|
78
|
+
type = @type_determiner.determine_type_for(encoded_record)
|
79
|
+
decoded_record = type.decode(encoded_record)
|
80
|
+
@formatter.format(decoded_record)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
class NormalisingFilter < Filter
|
85
|
+
def initialize(filename, type_determiner)
|
86
|
+
@filename = filename
|
87
|
+
@type_determiner = type_determiner
|
88
|
+
end
|
89
|
+
|
90
|
+
def filter(record)
|
91
|
+
type = @type_determiner.determine_type_for(:ascii_string => record, :filename => @filename)
|
92
|
+
type.normalise(record)
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
class SortingFilter < BufferingFilter
|
97
|
+
include ExternalPrograms
|
98
|
+
def filter_all(tempfile)
|
99
|
+
tempfile.close
|
100
|
+
|
101
|
+
sorted_tempfile = Tempfile.new("sort")
|
102
|
+
sorted_tempfile.close
|
103
|
+
|
104
|
+
sort(tempfile, sorted_tempfile)
|
105
|
+
sorted_tempfile.open
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
class ParsingFilter < Filter
|
110
|
+
def initialize(record_types)
|
111
|
+
@record_types = record_types
|
112
|
+
end
|
113
|
+
|
114
|
+
def filter(record)
|
115
|
+
header_line = record
|
116
|
+
record_type = identify_record_type_from(header_line)
|
117
|
+
values = parse_values_from_subsequent_lines(record_type)
|
118
|
+
|
119
|
+
consume_empty_line_between_records
|
120
|
+
|
121
|
+
AsciiDataTools::Record::Record.new(record_type, values)
|
122
|
+
end
|
123
|
+
|
124
|
+
protected
|
125
|
+
def identify_record_type_from(header_line)
|
126
|
+
match = header_line.match(/Record \d+ \((.*?)\)/)
|
127
|
+
raise "Cannot find record type in line #{header_line}!" unless match
|
128
|
+
expected_record_type_name = match[1]
|
129
|
+
@record_types.find_by_name(expected_record_type_name)
|
130
|
+
end
|
131
|
+
|
132
|
+
def parse_values_from_subsequent_lines(record_type)
|
133
|
+
values = []
|
134
|
+
record_type.number_of_content_fields.times { values << parse_value_from(upstream.read) }
|
135
|
+
values
|
136
|
+
end
|
137
|
+
|
138
|
+
def parse_value_from(line)
|
139
|
+
value = line.match(/.*?\[(.*?)\]/)[1]
|
140
|
+
value.gsub("\\n", "\n")
|
141
|
+
end
|
142
|
+
|
143
|
+
def consume_empty_line_between_records
|
144
|
+
upstream.read
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end
|
@@ -0,0 +1,139 @@
|
|
1
|
+
require 'ascii-data-tools/external_programs'
|
2
|
+
|
3
|
+
module AsciiDataTools
|
4
|
+
module Filter
|
5
|
+
module Diffing
|
6
|
+
class DiffFormattingFilter < FormattingFilter
|
7
|
+
DIFF_DELIMITER = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\n"
|
8
|
+
|
9
|
+
def initialize(type_determiner)
|
10
|
+
super(nil, type_determiner)
|
11
|
+
def @formatter.formattable(record_type)
|
12
|
+
record_type.extend(Formatting::UnnumberedFormatableType) unless record_type.instance_of?(Formatting::UnnumberedFormatableType)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def write(*streams)
|
17
|
+
while upstream.has_records?
|
18
|
+
difference = upstream.read
|
19
|
+
difference.left_contents.each {|rec| streams[0] << filter(rec)}
|
20
|
+
difference.right_contents.each {|rec| streams[1] << filter(rec)}
|
21
|
+
if upstream.has_records?
|
22
|
+
streams[0] << DIFF_DELIMITER
|
23
|
+
streams[1] << DIFF_DELIMITER
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
class DiffExecutingFilter < BufferingFilter
|
30
|
+
include ExternalPrograms
|
31
|
+
def initialize
|
32
|
+
super do |tempfiles|
|
33
|
+
stream = diff(tempfiles)
|
34
|
+
raise StreamsEqualException.new if stream.eof?
|
35
|
+
stream
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
protected
|
40
|
+
def upstream
|
41
|
+
if @first_time
|
42
|
+
tempfiles = @upstream.collect {|stream| buffer_as_tempfile(stream)}
|
43
|
+
@upstream = InputSource.new(nil, filter_all(tempfiles))
|
44
|
+
@first_time = false
|
45
|
+
end
|
46
|
+
@upstream
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
class StreamsEqualException < Exception
|
51
|
+
def initialize
|
52
|
+
super("The streams are equal")
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
class Difference
|
57
|
+
attr_reader :left_contents, :right_contents
|
58
|
+
|
59
|
+
def initialize(left_range, right_range)
|
60
|
+
@left_range, @right_range = left_range, right_range
|
61
|
+
@left_contents, @right_contents = [], []
|
62
|
+
@consumed_lines = 0
|
63
|
+
end
|
64
|
+
|
65
|
+
def complete?
|
66
|
+
@consumed_lines == lines_to_consume
|
67
|
+
end
|
68
|
+
|
69
|
+
def consume(line)
|
70
|
+
if line =~ /^[<>]/
|
71
|
+
case line[0..0]
|
72
|
+
when "<" then @left_contents << line[2..-1]
|
73
|
+
when ">" then @right_contents << line[2..-1]
|
74
|
+
end
|
75
|
+
end
|
76
|
+
@consumed_lines += 1
|
77
|
+
end
|
78
|
+
|
79
|
+
protected
|
80
|
+
def left_range_length
|
81
|
+
@left_range.to_a.length
|
82
|
+
end
|
83
|
+
|
84
|
+
def right_range_length
|
85
|
+
@right_range.to_a.length
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
class ConflictDifference < Difference
|
90
|
+
protected
|
91
|
+
def lines_to_consume
|
92
|
+
left_range_length + right_range_length + 1
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
class AdditionDifference < Difference
|
97
|
+
protected
|
98
|
+
def lines_to_consume
|
99
|
+
right_range_length
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
class DeletionDifference < Difference
|
104
|
+
protected
|
105
|
+
def lines_to_consume
|
106
|
+
left_range_length
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
class DiffParsingFilter < Filter
|
111
|
+
def initialize
|
112
|
+
super do |record|
|
113
|
+
difference = make_difference_from(record)
|
114
|
+
difference.consume(upstream.read) until difference.complete?
|
115
|
+
difference
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
protected
|
120
|
+
def make_difference_from(command_record)
|
121
|
+
left, command, right = split(command_record)
|
122
|
+
case command
|
123
|
+
when "c" then ConflictDifference.new(left, right)
|
124
|
+
when "a" then AdditionDifference.new(left, right)
|
125
|
+
when "d" then DeletionDifference.new(left, right)
|
126
|
+
else raise "Cannot parse diff line: #{command_record}"
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
def split(command_record)
|
131
|
+
left_start, _, left_end, command, right_start, _, right_end = command_record.scan(/^(\d([,](\d))?)(c|a|d)(\d([,](\d))?)\Z/).first
|
132
|
+
left_range = left_start.to_i..(left_end || left_start).to_i
|
133
|
+
right_range = right_start.to_i..(right_end || right_start).to_i
|
134
|
+
[left_range, command, right_range]
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
@@ -0,0 +1,109 @@
|
|
1
|
+
module AsciiDataTools
|
2
|
+
module Formatting
|
3
|
+
class Formatter
|
4
|
+
def initialize
|
5
|
+
@record_counter = 0
|
6
|
+
end
|
7
|
+
|
8
|
+
def format(record)
|
9
|
+
formattable(record.type).format(next_record_counter_and_increment, record.values)
|
10
|
+
end
|
11
|
+
alias :transform :format
|
12
|
+
|
13
|
+
protected
|
14
|
+
def next_record_counter_and_increment
|
15
|
+
@record_counter += 1
|
16
|
+
end
|
17
|
+
|
18
|
+
def formattable(record_type)
|
19
|
+
record_type.extend(FormatableType) unless record_type.instance_of?(FormatableType)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
module FormatableType
|
24
|
+
def format(record_number, values)
|
25
|
+
template % ([record_number] + escape_last(values))
|
26
|
+
end
|
27
|
+
|
28
|
+
protected
|
29
|
+
def template
|
30
|
+
@template ||= [header_template, field_templates, footer].flatten.join("\n")
|
31
|
+
end
|
32
|
+
|
33
|
+
def header_template
|
34
|
+
"Record %02d (#{name})"
|
35
|
+
end
|
36
|
+
|
37
|
+
def field_templates
|
38
|
+
make_fields_formatable_if_necessary
|
39
|
+
fields.enum_with_index.map do |field, index|
|
40
|
+
field.template(:position => index + 1,
|
41
|
+
:longest_field_name => length_of_longest_field_name,
|
42
|
+
:longest_field_length => longest_field_length,
|
43
|
+
:last_field? => (index + 1 == fields.length))
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def make_fields_formatable_if_necessary
|
48
|
+
fields.each {|field| field.extend(FormatableField) unless field.is_a?(FormatableField) }
|
49
|
+
end
|
50
|
+
|
51
|
+
def longest_field_length
|
52
|
+
if fields.all? {|field| field.respond_to?(:length)}
|
53
|
+
fields.max_by {|field| field.length }.length
|
54
|
+
else
|
55
|
+
0
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def escape_last(values)
|
60
|
+
new_values = values.clone
|
61
|
+
new_values[-1] = values[-1].gsub("\n", "\\n")
|
62
|
+
new_values
|
63
|
+
end
|
64
|
+
|
65
|
+
def footer
|
66
|
+
"\n"
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
module UnnumberedFormatableType
|
71
|
+
include FormatableType
|
72
|
+
|
73
|
+
def format(record_number, values)
|
74
|
+
template % escape_last(values)
|
75
|
+
end
|
76
|
+
|
77
|
+
protected
|
78
|
+
def header_template
|
79
|
+
"Record (#{name})"
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
module FormatableField
|
84
|
+
PAD_CHARACTER = "-"
|
85
|
+
MINIMUM_NUMBER_OF_DASHES = 5
|
86
|
+
def template(properties)
|
87
|
+
"%02d %s : [%s]%s" % [ properties[:position],
|
88
|
+
padded_field_name(name, properties[:longest_field_name]),
|
89
|
+
"%s",
|
90
|
+
padding(properties)
|
91
|
+
]
|
92
|
+
end
|
93
|
+
|
94
|
+
protected
|
95
|
+
def padded_field_name(field_name, length_of_longest_field_name)
|
96
|
+
field_name.ljust(length_of_longest_field_name)
|
97
|
+
end
|
98
|
+
|
99
|
+
def padding(properties)
|
100
|
+
PAD_CHARACTER * number_of_pad_chars(properties)
|
101
|
+
end
|
102
|
+
|
103
|
+
def number_of_pad_chars(properties)
|
104
|
+
return MINIMUM_NUMBER_OF_DASHES unless self.respond_to?(:length)
|
105
|
+
return MINIMUM_NUMBER_OF_DASHES + properties[:longest_field_length] - length - (properties[:last_field?] ? 1 : 0)
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module AsciiDataTools
|
2
|
+
class << self
|
3
|
+
def configure(&block)
|
4
|
+
record_types.instance_eval(&block)
|
5
|
+
end
|
6
|
+
|
7
|
+
def record_types
|
8
|
+
@record_types ||= RecordType::RecordTypeRepository.new
|
9
|
+
end
|
10
|
+
|
11
|
+
def autodiscover
|
12
|
+
require 'rubygems'
|
13
|
+
|
14
|
+
configuration_files_from_newest_gem_versions = Gem.find_files('ascii-data-tools/discover.rb').select do |path|
|
15
|
+
Gem.latest_load_paths.any? {|load_path| path.include?(load_path)} or not path.include?(Gem.default_dir)
|
16
|
+
end
|
17
|
+
|
18
|
+
configuration_files_from_newest_gem_versions.each {|f| load f}
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|