ascii-data-tools 0.9
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +3 -0
- data/.rvmrc +1 -0
- data/.travis.yml +4 -0
- data/Gemfile +3 -0
- data/Gemfile.lock +40 -0
- data/LICENSE.GPL2 +339 -0
- data/README.rdoc +52 -0
- data/Rakefile +42 -0
- data/TODO +4 -0
- data/ascii-data-tools.gemspec +30 -0
- data/bin/ascii-data-cat +13 -0
- data/bin/ascii-data-edit +13 -0
- data/bin/ascii-data-norm +13 -0
- data/bin/ascii-data-qdiff +13 -0
- data/bin/ascii-data-tools-config +9 -0
- data/examples/big +10000 -0
- data/examples/built_in_records.gz +0 -0
- data/examples/slightly_modified_built_in_records.gz +0 -0
- data/features/ascii-data-cat.feature +110 -0
- data/features/ascii-data-edit.feature +91 -0
- data/features/ascii-data-qdiff.feature +54 -0
- data/features/encoding_decoding.feature +68 -0
- data/features/normaliser.feature +27 -0
- data/features/plugins.feature +73 -0
- data/features/record_recognition.feature +61 -0
- data/features/step_definitions/ascii-data-cat_steps.rb +48 -0
- data/features/step_definitions/ascii-data-edit_steps.rb +38 -0
- data/features/step_definitions/ascii-data-norm_steps.rb +7 -0
- data/features/step_definitions/ascii-data-qdiff_steps.rb +43 -0
- data/features/step_definitions/encoding_decoding_steps.rb +23 -0
- data/features/step_definitions/plugins_steps.rb +11 -0
- data/features/step_definitions/record_recognition_steps.rb +10 -0
- data/features/support/env.rb +5 -0
- data/lib/ascii-data-tools.rb +8 -0
- data/lib/ascii-data-tools/configuration.rb +169 -0
- data/lib/ascii-data-tools/configuration_printer.rb +38 -0
- data/lib/ascii-data-tools/controller.rb +123 -0
- data/lib/ascii-data-tools/discover.rb +19 -0
- data/lib/ascii-data-tools/external_programs.rb +23 -0
- data/lib/ascii-data-tools/filter.rb +148 -0
- data/lib/ascii-data-tools/filter/diffing.rb +139 -0
- data/lib/ascii-data-tools/formatting.rb +109 -0
- data/lib/ascii-data-tools/global_autodiscovery.rb +21 -0
- data/lib/ascii-data-tools/record.rb +50 -0
- data/lib/ascii-data-tools/record_type.rb +139 -0
- data/lib/ascii-data-tools/record_type/builder.rb +50 -0
- data/lib/ascii-data-tools/record_type/decoder.rb +77 -0
- data/lib/ascii-data-tools/record_type/encoder.rb +17 -0
- data/lib/ascii-data-tools/record_type/field.rb +168 -0
- data/lib/ascii-data-tools/record_type/normaliser.rb +38 -0
- data/lib/ascii-data-tools/ruby_extensions.rb +7 -0
- data/lib/ascii-data-tools/version.rb +3 -0
- data/spec/ascii-data-tools/configuration_printer_spec.rb +51 -0
- data/spec/ascii-data-tools/configuration_spec.rb +153 -0
- data/spec/ascii-data-tools/discover_spec.rb +8 -0
- data/spec/ascii-data-tools/filter/diffing_spec.rb +82 -0
- data/spec/ascii-data-tools/filter_spec.rb +107 -0
- data/spec/ascii-data-tools/formatting_spec.rb +106 -0
- data/spec/ascii-data-tools/record_spec.rb +49 -0
- data/spec/ascii-data-tools/record_type/builder_spec.rb +69 -0
- data/spec/ascii-data-tools/record_type/decoder_spec.rb +73 -0
- data/spec/ascii-data-tools/record_type/encoder_spec.rb +32 -0
- data/spec/ascii-data-tools/record_type/field_spec.rb +160 -0
- data/spec/ascii-data-tools/record_type/normaliser_spec.rb +25 -0
- data/spec/ascii-data-tools/record_type_spec.rb +175 -0
- data/spec/filter_helper.rb +24 -0
- data/spec/record_type_helpers.rb +8 -0
- data/spec/spec.opts +2 -0
- data/spec/spec_helper.rb +5 -0
- metadata +196 -0
@@ -0,0 +1,19 @@
|
|
1
|
+
AsciiDataTools.configure do
|
2
|
+
record_type("EXAMPLE01") do
|
3
|
+
field "RECORD_TYPE", :length => 9, :constrained_to => "EXAMPLE01"
|
4
|
+
field "USAGE", :length => 3
|
5
|
+
field "A_NUMBER", :length => 16
|
6
|
+
field "B_NUMBER", :length => 16
|
7
|
+
field "CHARGEABLE_UNITS", :length => 4
|
8
|
+
field "END_OF_RECORD", :length => 1
|
9
|
+
end
|
10
|
+
|
11
|
+
record_type("EXAMPLE02") do
|
12
|
+
field "RECORD_TYPE", :length => 9, :constrained_to => "EXAMPLE02"
|
13
|
+
field "APN", :length => 12
|
14
|
+
field "TIMESTAMP", :length => 14, :normalised => true
|
15
|
+
field "SESSION_DURATION", :length => 4
|
16
|
+
field "CHARGEABLE_UNITS", :length => 4
|
17
|
+
field "END_OF_RECORD", :length => 1
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module AsciiDataTools
|
2
|
+
module ExternalPrograms
|
3
|
+
def modify_file_mtime_to(new_mtime)
|
4
|
+
Kernel.system("touch -am -t #{new_mtime.strftime('%Y%m%d%H%M.%S')} #{self.path}")
|
5
|
+
end
|
6
|
+
|
7
|
+
def diff(files)
|
8
|
+
IO.popen(diff_command_for(files))
|
9
|
+
end
|
10
|
+
|
11
|
+
def diff_command_for(files)
|
12
|
+
"diff " + files.collect(&:path).join(' ')
|
13
|
+
end
|
14
|
+
|
15
|
+
def sort(input_file, output_file)
|
16
|
+
Kernel.system("sort #{input_file.path} > #{output_file.path}")
|
17
|
+
end
|
18
|
+
|
19
|
+
def edit_differences(filenames)
|
20
|
+
Kernel.system("vimdiff #{filenames.join(' ')}")
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,148 @@
|
|
1
|
+
require 'tempfile'
|
2
|
+
require 'ascii-data-tools/external_programs'
|
3
|
+
|
4
|
+
module AsciiDataTools
|
5
|
+
module Filter
|
6
|
+
class Filter
|
7
|
+
def initialize(&block)
|
8
|
+
@filter_block = block
|
9
|
+
end
|
10
|
+
|
11
|
+
def <<(upstream_filter)
|
12
|
+
@upstream = upstream_filter
|
13
|
+
self
|
14
|
+
end
|
15
|
+
|
16
|
+
def read
|
17
|
+
filter(upstream.read)
|
18
|
+
end
|
19
|
+
|
20
|
+
def write(output_stream)
|
21
|
+
output_stream << read while has_records?
|
22
|
+
end
|
23
|
+
|
24
|
+
def has_records?
|
25
|
+
upstream.has_records?
|
26
|
+
end
|
27
|
+
|
28
|
+
protected
|
29
|
+
def filter(record)
|
30
|
+
@filter_block[record]
|
31
|
+
end
|
32
|
+
|
33
|
+
def upstream
|
34
|
+
@upstream
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
class BufferingFilter < Filter
|
39
|
+
def initialize(&block)
|
40
|
+
@first_time = true
|
41
|
+
@filter_all_block = block
|
42
|
+
end
|
43
|
+
|
44
|
+
protected
|
45
|
+
def filter_all(tempfile)
|
46
|
+
@filter_all_block[tempfile]
|
47
|
+
end
|
48
|
+
|
49
|
+
def filter(record)
|
50
|
+
record
|
51
|
+
end
|
52
|
+
|
53
|
+
def upstream
|
54
|
+
if @first_time
|
55
|
+
tempfile = buffer_as_tempfile(@upstream)
|
56
|
+
@upstream = InputSource.new(nil, filter_all(tempfile))
|
57
|
+
@first_time = false
|
58
|
+
end
|
59
|
+
@upstream
|
60
|
+
end
|
61
|
+
|
62
|
+
def buffer_as_tempfile(stream)
|
63
|
+
tempfile = Tempfile.new("filter")
|
64
|
+
tempfile << stream.read while stream.has_records?
|
65
|
+
tempfile.open
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
class FormattingFilter < Filter
|
70
|
+
def initialize(filename, type_determiner)
|
71
|
+
@formatter = Formatting::Formatter.new
|
72
|
+
@filename = filename
|
73
|
+
@type_determiner = type_determiner
|
74
|
+
end
|
75
|
+
|
76
|
+
def filter(record)
|
77
|
+
encoded_record = {:ascii_string => record, :filename => @filename}
|
78
|
+
type = @type_determiner.determine_type_for(encoded_record)
|
79
|
+
decoded_record = type.decode(encoded_record)
|
80
|
+
@formatter.format(decoded_record)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
class NormalisingFilter < Filter
|
85
|
+
def initialize(filename, type_determiner)
|
86
|
+
@filename = filename
|
87
|
+
@type_determiner = type_determiner
|
88
|
+
end
|
89
|
+
|
90
|
+
def filter(record)
|
91
|
+
type = @type_determiner.determine_type_for(:ascii_string => record, :filename => @filename)
|
92
|
+
type.normalise(record)
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
class SortingFilter < BufferingFilter
|
97
|
+
include ExternalPrograms
|
98
|
+
def filter_all(tempfile)
|
99
|
+
tempfile.close
|
100
|
+
|
101
|
+
sorted_tempfile = Tempfile.new("sort")
|
102
|
+
sorted_tempfile.close
|
103
|
+
|
104
|
+
sort(tempfile, sorted_tempfile)
|
105
|
+
sorted_tempfile.open
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
class ParsingFilter < Filter
|
110
|
+
def initialize(record_types)
|
111
|
+
@record_types = record_types
|
112
|
+
end
|
113
|
+
|
114
|
+
def filter(record)
|
115
|
+
header_line = record
|
116
|
+
record_type = identify_record_type_from(header_line)
|
117
|
+
values = parse_values_from_subsequent_lines(record_type)
|
118
|
+
|
119
|
+
consume_empty_line_between_records
|
120
|
+
|
121
|
+
AsciiDataTools::Record::Record.new(record_type, values)
|
122
|
+
end
|
123
|
+
|
124
|
+
protected
|
125
|
+
def identify_record_type_from(header_line)
|
126
|
+
match = header_line.match(/Record \d+ \((.*?)\)/)
|
127
|
+
raise "Cannot find record type in line #{header_line}!" unless match
|
128
|
+
expected_record_type_name = match[1]
|
129
|
+
@record_types.find_by_name(expected_record_type_name)
|
130
|
+
end
|
131
|
+
|
132
|
+
def parse_values_from_subsequent_lines(record_type)
|
133
|
+
values = []
|
134
|
+
record_type.number_of_content_fields.times { values << parse_value_from(upstream.read) }
|
135
|
+
values
|
136
|
+
end
|
137
|
+
|
138
|
+
def parse_value_from(line)
|
139
|
+
value = line.match(/.*?\[(.*?)\]/)[1]
|
140
|
+
value.gsub("\\n", "\n")
|
141
|
+
end
|
142
|
+
|
143
|
+
def consume_empty_line_between_records
|
144
|
+
upstream.read
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end
|
@@ -0,0 +1,139 @@
|
|
1
|
+
require 'ascii-data-tools/external_programs'
|
2
|
+
|
3
|
+
module AsciiDataTools
|
4
|
+
module Filter
|
5
|
+
module Diffing
|
6
|
+
class DiffFormattingFilter < FormattingFilter
|
7
|
+
DIFF_DELIMITER = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\n"
|
8
|
+
|
9
|
+
def initialize(type_determiner)
|
10
|
+
super(nil, type_determiner)
|
11
|
+
def @formatter.formattable(record_type)
|
12
|
+
record_type.extend(Formatting::UnnumberedFormatableType) unless record_type.instance_of?(Formatting::UnnumberedFormatableType)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def write(*streams)
|
17
|
+
while upstream.has_records?
|
18
|
+
difference = upstream.read
|
19
|
+
difference.left_contents.each {|rec| streams[0] << filter(rec)}
|
20
|
+
difference.right_contents.each {|rec| streams[1] << filter(rec)}
|
21
|
+
if upstream.has_records?
|
22
|
+
streams[0] << DIFF_DELIMITER
|
23
|
+
streams[1] << DIFF_DELIMITER
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
class DiffExecutingFilter < BufferingFilter
|
30
|
+
include ExternalPrograms
|
31
|
+
def initialize
|
32
|
+
super do |tempfiles|
|
33
|
+
stream = diff(tempfiles)
|
34
|
+
raise StreamsEqualException.new if stream.eof?
|
35
|
+
stream
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
protected
|
40
|
+
def upstream
|
41
|
+
if @first_time
|
42
|
+
tempfiles = @upstream.collect {|stream| buffer_as_tempfile(stream)}
|
43
|
+
@upstream = InputSource.new(nil, filter_all(tempfiles))
|
44
|
+
@first_time = false
|
45
|
+
end
|
46
|
+
@upstream
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
class StreamsEqualException < Exception
|
51
|
+
def initialize
|
52
|
+
super("The streams are equal")
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
class Difference
|
57
|
+
attr_reader :left_contents, :right_contents
|
58
|
+
|
59
|
+
def initialize(left_range, right_range)
|
60
|
+
@left_range, @right_range = left_range, right_range
|
61
|
+
@left_contents, @right_contents = [], []
|
62
|
+
@consumed_lines = 0
|
63
|
+
end
|
64
|
+
|
65
|
+
def complete?
|
66
|
+
@consumed_lines == lines_to_consume
|
67
|
+
end
|
68
|
+
|
69
|
+
def consume(line)
|
70
|
+
if line =~ /^[<>]/
|
71
|
+
case line[0..0]
|
72
|
+
when "<" then @left_contents << line[2..-1]
|
73
|
+
when ">" then @right_contents << line[2..-1]
|
74
|
+
end
|
75
|
+
end
|
76
|
+
@consumed_lines += 1
|
77
|
+
end
|
78
|
+
|
79
|
+
protected
|
80
|
+
def left_range_length
|
81
|
+
@left_range.to_a.length
|
82
|
+
end
|
83
|
+
|
84
|
+
def right_range_length
|
85
|
+
@right_range.to_a.length
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
class ConflictDifference < Difference
|
90
|
+
protected
|
91
|
+
def lines_to_consume
|
92
|
+
left_range_length + right_range_length + 1
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
class AdditionDifference < Difference
|
97
|
+
protected
|
98
|
+
def lines_to_consume
|
99
|
+
right_range_length
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
class DeletionDifference < Difference
|
104
|
+
protected
|
105
|
+
def lines_to_consume
|
106
|
+
left_range_length
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
class DiffParsingFilter < Filter
|
111
|
+
def initialize
|
112
|
+
super do |record|
|
113
|
+
difference = make_difference_from(record)
|
114
|
+
difference.consume(upstream.read) until difference.complete?
|
115
|
+
difference
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
protected
|
120
|
+
def make_difference_from(command_record)
|
121
|
+
left, command, right = split(command_record)
|
122
|
+
case command
|
123
|
+
when "c" then ConflictDifference.new(left, right)
|
124
|
+
when "a" then AdditionDifference.new(left, right)
|
125
|
+
when "d" then DeletionDifference.new(left, right)
|
126
|
+
else raise "Cannot parse diff line: #{command_record}"
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
def split(command_record)
|
131
|
+
left_start, _, left_end, command, right_start, _, right_end = command_record.scan(/^(\d([,](\d))?)(c|a|d)(\d([,](\d))?)\Z/).first
|
132
|
+
left_range = left_start.to_i..(left_end || left_start).to_i
|
133
|
+
right_range = right_start.to_i..(right_end || right_start).to_i
|
134
|
+
[left_range, command, right_range]
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
@@ -0,0 +1,109 @@
|
|
1
|
+
module AsciiDataTools
|
2
|
+
module Formatting
|
3
|
+
class Formatter
|
4
|
+
def initialize
|
5
|
+
@record_counter = 0
|
6
|
+
end
|
7
|
+
|
8
|
+
def format(record)
|
9
|
+
formattable(record.type).format(next_record_counter_and_increment, record.values)
|
10
|
+
end
|
11
|
+
alias :transform :format
|
12
|
+
|
13
|
+
protected
|
14
|
+
def next_record_counter_and_increment
|
15
|
+
@record_counter += 1
|
16
|
+
end
|
17
|
+
|
18
|
+
def formattable(record_type)
|
19
|
+
record_type.extend(FormatableType) unless record_type.instance_of?(FormatableType)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
module FormatableType
|
24
|
+
def format(record_number, values)
|
25
|
+
template % ([record_number] + escape_last(values))
|
26
|
+
end
|
27
|
+
|
28
|
+
protected
|
29
|
+
def template
|
30
|
+
@template ||= [header_template, field_templates, footer].flatten.join("\n")
|
31
|
+
end
|
32
|
+
|
33
|
+
def header_template
|
34
|
+
"Record %02d (#{name})"
|
35
|
+
end
|
36
|
+
|
37
|
+
def field_templates
|
38
|
+
make_fields_formatable_if_necessary
|
39
|
+
fields.enum_with_index.map do |field, index|
|
40
|
+
field.template(:position => index + 1,
|
41
|
+
:longest_field_name => length_of_longest_field_name,
|
42
|
+
:longest_field_length => longest_field_length,
|
43
|
+
:last_field? => (index + 1 == fields.length))
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def make_fields_formatable_if_necessary
|
48
|
+
fields.each {|field| field.extend(FormatableField) unless field.is_a?(FormatableField) }
|
49
|
+
end
|
50
|
+
|
51
|
+
def longest_field_length
|
52
|
+
if fields.all? {|field| field.respond_to?(:length)}
|
53
|
+
fields.max_by {|field| field.length }.length
|
54
|
+
else
|
55
|
+
0
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def escape_last(values)
|
60
|
+
new_values = values.clone
|
61
|
+
new_values[-1] = values[-1].gsub("\n", "\\n")
|
62
|
+
new_values
|
63
|
+
end
|
64
|
+
|
65
|
+
def footer
|
66
|
+
"\n"
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
module UnnumberedFormatableType
|
71
|
+
include FormatableType
|
72
|
+
|
73
|
+
def format(record_number, values)
|
74
|
+
template % escape_last(values)
|
75
|
+
end
|
76
|
+
|
77
|
+
protected
|
78
|
+
def header_template
|
79
|
+
"Record (#{name})"
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
module FormatableField
|
84
|
+
PAD_CHARACTER = "-"
|
85
|
+
MINIMUM_NUMBER_OF_DASHES = 5
|
86
|
+
def template(properties)
|
87
|
+
"%02d %s : [%s]%s" % [ properties[:position],
|
88
|
+
padded_field_name(name, properties[:longest_field_name]),
|
89
|
+
"%s",
|
90
|
+
padding(properties)
|
91
|
+
]
|
92
|
+
end
|
93
|
+
|
94
|
+
protected
|
95
|
+
def padded_field_name(field_name, length_of_longest_field_name)
|
96
|
+
field_name.ljust(length_of_longest_field_name)
|
97
|
+
end
|
98
|
+
|
99
|
+
def padding(properties)
|
100
|
+
PAD_CHARACTER * number_of_pad_chars(properties)
|
101
|
+
end
|
102
|
+
|
103
|
+
def number_of_pad_chars(properties)
|
104
|
+
return MINIMUM_NUMBER_OF_DASHES unless self.respond_to?(:length)
|
105
|
+
return MINIMUM_NUMBER_OF_DASHES + properties[:longest_field_length] - length - (properties[:last_field?] ? 1 : 0)
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module AsciiDataTools
|
2
|
+
class << self
|
3
|
+
def configure(&block)
|
4
|
+
record_types.instance_eval(&block)
|
5
|
+
end
|
6
|
+
|
7
|
+
def record_types
|
8
|
+
@record_types ||= RecordType::RecordTypeRepository.new
|
9
|
+
end
|
10
|
+
|
11
|
+
def autodiscover
|
12
|
+
require 'rubygems'
|
13
|
+
|
14
|
+
configuration_files_from_newest_gem_versions = Gem.find_files('ascii-data-tools/discover.rb').select do |path|
|
15
|
+
Gem.latest_load_paths.any? {|load_path| path.include?(load_path)} or not path.include?(Gem.default_dir)
|
16
|
+
end
|
17
|
+
|
18
|
+
configuration_files_from_newest_gem_versions.each {|f| load f}
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|