ascii-data-tools 0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. data/.gitignore +3 -0
  2. data/.rvmrc +1 -0
  3. data/.travis.yml +4 -0
  4. data/Gemfile +3 -0
  5. data/Gemfile.lock +40 -0
  6. data/LICENSE.GPL2 +339 -0
  7. data/README.rdoc +52 -0
  8. data/Rakefile +42 -0
  9. data/TODO +4 -0
  10. data/ascii-data-tools.gemspec +30 -0
  11. data/bin/ascii-data-cat +13 -0
  12. data/bin/ascii-data-edit +13 -0
  13. data/bin/ascii-data-norm +13 -0
  14. data/bin/ascii-data-qdiff +13 -0
  15. data/bin/ascii-data-tools-config +9 -0
  16. data/examples/big +10000 -0
  17. data/examples/built_in_records.gz +0 -0
  18. data/examples/slightly_modified_built_in_records.gz +0 -0
  19. data/features/ascii-data-cat.feature +110 -0
  20. data/features/ascii-data-edit.feature +91 -0
  21. data/features/ascii-data-qdiff.feature +54 -0
  22. data/features/encoding_decoding.feature +68 -0
  23. data/features/normaliser.feature +27 -0
  24. data/features/plugins.feature +73 -0
  25. data/features/record_recognition.feature +61 -0
  26. data/features/step_definitions/ascii-data-cat_steps.rb +48 -0
  27. data/features/step_definitions/ascii-data-edit_steps.rb +38 -0
  28. data/features/step_definitions/ascii-data-norm_steps.rb +7 -0
  29. data/features/step_definitions/ascii-data-qdiff_steps.rb +43 -0
  30. data/features/step_definitions/encoding_decoding_steps.rb +23 -0
  31. data/features/step_definitions/plugins_steps.rb +11 -0
  32. data/features/step_definitions/record_recognition_steps.rb +10 -0
  33. data/features/support/env.rb +5 -0
  34. data/lib/ascii-data-tools.rb +8 -0
  35. data/lib/ascii-data-tools/configuration.rb +169 -0
  36. data/lib/ascii-data-tools/configuration_printer.rb +38 -0
  37. data/lib/ascii-data-tools/controller.rb +123 -0
  38. data/lib/ascii-data-tools/discover.rb +19 -0
  39. data/lib/ascii-data-tools/external_programs.rb +23 -0
  40. data/lib/ascii-data-tools/filter.rb +148 -0
  41. data/lib/ascii-data-tools/filter/diffing.rb +139 -0
  42. data/lib/ascii-data-tools/formatting.rb +109 -0
  43. data/lib/ascii-data-tools/global_autodiscovery.rb +21 -0
  44. data/lib/ascii-data-tools/record.rb +50 -0
  45. data/lib/ascii-data-tools/record_type.rb +139 -0
  46. data/lib/ascii-data-tools/record_type/builder.rb +50 -0
  47. data/lib/ascii-data-tools/record_type/decoder.rb +77 -0
  48. data/lib/ascii-data-tools/record_type/encoder.rb +17 -0
  49. data/lib/ascii-data-tools/record_type/field.rb +168 -0
  50. data/lib/ascii-data-tools/record_type/normaliser.rb +38 -0
  51. data/lib/ascii-data-tools/ruby_extensions.rb +7 -0
  52. data/lib/ascii-data-tools/version.rb +3 -0
  53. data/spec/ascii-data-tools/configuration_printer_spec.rb +51 -0
  54. data/spec/ascii-data-tools/configuration_spec.rb +153 -0
  55. data/spec/ascii-data-tools/discover_spec.rb +8 -0
  56. data/spec/ascii-data-tools/filter/diffing_spec.rb +82 -0
  57. data/spec/ascii-data-tools/filter_spec.rb +107 -0
  58. data/spec/ascii-data-tools/formatting_spec.rb +106 -0
  59. data/spec/ascii-data-tools/record_spec.rb +49 -0
  60. data/spec/ascii-data-tools/record_type/builder_spec.rb +69 -0
  61. data/spec/ascii-data-tools/record_type/decoder_spec.rb +73 -0
  62. data/spec/ascii-data-tools/record_type/encoder_spec.rb +32 -0
  63. data/spec/ascii-data-tools/record_type/field_spec.rb +160 -0
  64. data/spec/ascii-data-tools/record_type/normaliser_spec.rb +25 -0
  65. data/spec/ascii-data-tools/record_type_spec.rb +175 -0
  66. data/spec/filter_helper.rb +24 -0
  67. data/spec/record_type_helpers.rb +8 -0
  68. data/spec/spec.opts +2 -0
  69. data/spec/spec_helper.rb +5 -0
  70. metadata +196 -0
@@ -0,0 +1,19 @@
1
+ AsciiDataTools.configure do
2
+ record_type("EXAMPLE01") do
3
+ field "RECORD_TYPE", :length => 9, :constrained_to => "EXAMPLE01"
4
+ field "USAGE", :length => 3
5
+ field "A_NUMBER", :length => 16
6
+ field "B_NUMBER", :length => 16
7
+ field "CHARGEABLE_UNITS", :length => 4
8
+ field "END_OF_RECORD", :length => 1
9
+ end
10
+
11
+ record_type("EXAMPLE02") do
12
+ field "RECORD_TYPE", :length => 9, :constrained_to => "EXAMPLE02"
13
+ field "APN", :length => 12
14
+ field "TIMESTAMP", :length => 14, :normalised => true
15
+ field "SESSION_DURATION", :length => 4
16
+ field "CHARGEABLE_UNITS", :length => 4
17
+ field "END_OF_RECORD", :length => 1
18
+ end
19
+ end
@@ -0,0 +1,23 @@
1
+ module AsciiDataTools
2
+ module ExternalPrograms
3
+ def modify_file_mtime_to(new_mtime)
4
+ Kernel.system("touch -am -t #{new_mtime.strftime('%Y%m%d%H%M.%S')} #{self.path}")
5
+ end
6
+
7
+ def diff(files)
8
+ IO.popen(diff_command_for(files))
9
+ end
10
+
11
+ def diff_command_for(files)
12
+ "diff " + files.collect(&:path).join(' ')
13
+ end
14
+
15
+ def sort(input_file, output_file)
16
+ Kernel.system("sort #{input_file.path} > #{output_file.path}")
17
+ end
18
+
19
+ def edit_differences(filenames)
20
+ Kernel.system("vimdiff #{filenames.join(' ')}")
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,148 @@
1
+ require 'tempfile'
2
+ require 'ascii-data-tools/external_programs'
3
+
4
+ module AsciiDataTools
5
+ module Filter
6
+ class Filter
7
+ def initialize(&block)
8
+ @filter_block = block
9
+ end
10
+
11
+ def <<(upstream_filter)
12
+ @upstream = upstream_filter
13
+ self
14
+ end
15
+
16
+ def read
17
+ filter(upstream.read)
18
+ end
19
+
20
+ def write(output_stream)
21
+ output_stream << read while has_records?
22
+ end
23
+
24
+ def has_records?
25
+ upstream.has_records?
26
+ end
27
+
28
+ protected
29
+ def filter(record)
30
+ @filter_block[record]
31
+ end
32
+
33
+ def upstream
34
+ @upstream
35
+ end
36
+ end
37
+
38
+ class BufferingFilter < Filter
39
+ def initialize(&block)
40
+ @first_time = true
41
+ @filter_all_block = block
42
+ end
43
+
44
+ protected
45
+ def filter_all(tempfile)
46
+ @filter_all_block[tempfile]
47
+ end
48
+
49
+ def filter(record)
50
+ record
51
+ end
52
+
53
+ def upstream
54
+ if @first_time
55
+ tempfile = buffer_as_tempfile(@upstream)
56
+ @upstream = InputSource.new(nil, filter_all(tempfile))
57
+ @first_time = false
58
+ end
59
+ @upstream
60
+ end
61
+
62
+ def buffer_as_tempfile(stream)
63
+ tempfile = Tempfile.new("filter")
64
+ tempfile << stream.read while stream.has_records?
65
+ tempfile.open
66
+ end
67
+ end
68
+
69
+ class FormattingFilter < Filter
70
+ def initialize(filename, type_determiner)
71
+ @formatter = Formatting::Formatter.new
72
+ @filename = filename
73
+ @type_determiner = type_determiner
74
+ end
75
+
76
+ def filter(record)
77
+ encoded_record = {:ascii_string => record, :filename => @filename}
78
+ type = @type_determiner.determine_type_for(encoded_record)
79
+ decoded_record = type.decode(encoded_record)
80
+ @formatter.format(decoded_record)
81
+ end
82
+ end
83
+
84
+ class NormalisingFilter < Filter
85
+ def initialize(filename, type_determiner)
86
+ @filename = filename
87
+ @type_determiner = type_determiner
88
+ end
89
+
90
+ def filter(record)
91
+ type = @type_determiner.determine_type_for(:ascii_string => record, :filename => @filename)
92
+ type.normalise(record)
93
+ end
94
+ end
95
+
96
+ class SortingFilter < BufferingFilter
97
+ include ExternalPrograms
98
+ def filter_all(tempfile)
99
+ tempfile.close
100
+
101
+ sorted_tempfile = Tempfile.new("sort")
102
+ sorted_tempfile.close
103
+
104
+ sort(tempfile, sorted_tempfile)
105
+ sorted_tempfile.open
106
+ end
107
+ end
108
+
109
+ class ParsingFilter < Filter
110
+ def initialize(record_types)
111
+ @record_types = record_types
112
+ end
113
+
114
+ def filter(record)
115
+ header_line = record
116
+ record_type = identify_record_type_from(header_line)
117
+ values = parse_values_from_subsequent_lines(record_type)
118
+
119
+ consume_empty_line_between_records
120
+
121
+ AsciiDataTools::Record::Record.new(record_type, values)
122
+ end
123
+
124
+ protected
125
+ def identify_record_type_from(header_line)
126
+ match = header_line.match(/Record \d+ \((.*?)\)/)
127
+ raise "Cannot find record type in line #{header_line}!" unless match
128
+ expected_record_type_name = match[1]
129
+ @record_types.find_by_name(expected_record_type_name)
130
+ end
131
+
132
+ def parse_values_from_subsequent_lines(record_type)
133
+ values = []
134
+ record_type.number_of_content_fields.times { values << parse_value_from(upstream.read) }
135
+ values
136
+ end
137
+
138
+ def parse_value_from(line)
139
+ value = line.match(/.*?\[(.*?)\]/)[1]
140
+ value.gsub("\\n", "\n")
141
+ end
142
+
143
+ def consume_empty_line_between_records
144
+ upstream.read
145
+ end
146
+ end
147
+ end
148
+ end
@@ -0,0 +1,139 @@
1
+ require 'ascii-data-tools/external_programs'
2
+
3
+ module AsciiDataTools
4
+ module Filter
5
+ module Diffing
6
+ class DiffFormattingFilter < FormattingFilter
7
+ DIFF_DELIMITER = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\n"
8
+
9
+ def initialize(type_determiner)
10
+ super(nil, type_determiner)
11
+ def @formatter.formattable(record_type)
12
+ record_type.extend(Formatting::UnnumberedFormatableType) unless record_type.instance_of?(Formatting::UnnumberedFormatableType)
13
+ end
14
+ end
15
+
16
+ def write(*streams)
17
+ while upstream.has_records?
18
+ difference = upstream.read
19
+ difference.left_contents.each {|rec| streams[0] << filter(rec)}
20
+ difference.right_contents.each {|rec| streams[1] << filter(rec)}
21
+ if upstream.has_records?
22
+ streams[0] << DIFF_DELIMITER
23
+ streams[1] << DIFF_DELIMITER
24
+ end
25
+ end
26
+ end
27
+ end
28
+
29
+ class DiffExecutingFilter < BufferingFilter
30
+ include ExternalPrograms
31
+ def initialize
32
+ super do |tempfiles|
33
+ stream = diff(tempfiles)
34
+ raise StreamsEqualException.new if stream.eof?
35
+ stream
36
+ end
37
+ end
38
+
39
+ protected
40
+ def upstream
41
+ if @first_time
42
+ tempfiles = @upstream.collect {|stream| buffer_as_tempfile(stream)}
43
+ @upstream = InputSource.new(nil, filter_all(tempfiles))
44
+ @first_time = false
45
+ end
46
+ @upstream
47
+ end
48
+ end
49
+
50
+ class StreamsEqualException < Exception
51
+ def initialize
52
+ super("The streams are equal")
53
+ end
54
+ end
55
+
56
+ class Difference
57
+ attr_reader :left_contents, :right_contents
58
+
59
+ def initialize(left_range, right_range)
60
+ @left_range, @right_range = left_range, right_range
61
+ @left_contents, @right_contents = [], []
62
+ @consumed_lines = 0
63
+ end
64
+
65
+ def complete?
66
+ @consumed_lines == lines_to_consume
67
+ end
68
+
69
+ def consume(line)
70
+ if line =~ /^[<>]/
71
+ case line[0..0]
72
+ when "<" then @left_contents << line[2..-1]
73
+ when ">" then @right_contents << line[2..-1]
74
+ end
75
+ end
76
+ @consumed_lines += 1
77
+ end
78
+
79
+ protected
80
+ def left_range_length
81
+ @left_range.to_a.length
82
+ end
83
+
84
+ def right_range_length
85
+ @right_range.to_a.length
86
+ end
87
+ end
88
+
89
+ class ConflictDifference < Difference
90
+ protected
91
+ def lines_to_consume
92
+ left_range_length + right_range_length + 1
93
+ end
94
+ end
95
+
96
+ class AdditionDifference < Difference
97
+ protected
98
+ def lines_to_consume
99
+ right_range_length
100
+ end
101
+ end
102
+
103
+ class DeletionDifference < Difference
104
+ protected
105
+ def lines_to_consume
106
+ left_range_length
107
+ end
108
+ end
109
+
110
+ class DiffParsingFilter < Filter
111
+ def initialize
112
+ super do |record|
113
+ difference = make_difference_from(record)
114
+ difference.consume(upstream.read) until difference.complete?
115
+ difference
116
+ end
117
+ end
118
+
119
+ protected
120
+ def make_difference_from(command_record)
121
+ left, command, right = split(command_record)
122
+ case command
123
+ when "c" then ConflictDifference.new(left, right)
124
+ when "a" then AdditionDifference.new(left, right)
125
+ when "d" then DeletionDifference.new(left, right)
126
+ else raise "Cannot parse diff line: #{command_record}"
127
+ end
128
+ end
129
+
130
+ def split(command_record)
131
+ left_start, _, left_end, command, right_start, _, right_end = command_record.scan(/^(\d([,](\d))?)(c|a|d)(\d([,](\d))?)\Z/).first
132
+ left_range = left_start.to_i..(left_end || left_start).to_i
133
+ right_range = right_start.to_i..(right_end || right_start).to_i
134
+ [left_range, command, right_range]
135
+ end
136
+ end
137
+ end
138
+ end
139
+ end
@@ -0,0 +1,109 @@
1
+ module AsciiDataTools
2
+ module Formatting
3
+ class Formatter
4
+ def initialize
5
+ @record_counter = 0
6
+ end
7
+
8
+ def format(record)
9
+ formattable(record.type).format(next_record_counter_and_increment, record.values)
10
+ end
11
+ alias :transform :format
12
+
13
+ protected
14
+ def next_record_counter_and_increment
15
+ @record_counter += 1
16
+ end
17
+
18
+ def formattable(record_type)
19
+ record_type.extend(FormatableType) unless record_type.instance_of?(FormatableType)
20
+ end
21
+ end
22
+
23
+ module FormatableType
24
+ def format(record_number, values)
25
+ template % ([record_number] + escape_last(values))
26
+ end
27
+
28
+ protected
29
+ def template
30
+ @template ||= [header_template, field_templates, footer].flatten.join("\n")
31
+ end
32
+
33
+ def header_template
34
+ "Record %02d (#{name})"
35
+ end
36
+
37
+ def field_templates
38
+ make_fields_formatable_if_necessary
39
+ fields.enum_with_index.map do |field, index|
40
+ field.template(:position => index + 1,
41
+ :longest_field_name => length_of_longest_field_name,
42
+ :longest_field_length => longest_field_length,
43
+ :last_field? => (index + 1 == fields.length))
44
+ end
45
+ end
46
+
47
+ def make_fields_formatable_if_necessary
48
+ fields.each {|field| field.extend(FormatableField) unless field.is_a?(FormatableField) }
49
+ end
50
+
51
+ def longest_field_length
52
+ if fields.all? {|field| field.respond_to?(:length)}
53
+ fields.max_by {|field| field.length }.length
54
+ else
55
+ 0
56
+ end
57
+ end
58
+
59
+ def escape_last(values)
60
+ new_values = values.clone
61
+ new_values[-1] = values[-1].gsub("\n", "\\n")
62
+ new_values
63
+ end
64
+
65
+ def footer
66
+ "\n"
67
+ end
68
+ end
69
+
70
+ module UnnumberedFormatableType
71
+ include FormatableType
72
+
73
+ def format(record_number, values)
74
+ template % escape_last(values)
75
+ end
76
+
77
+ protected
78
+ def header_template
79
+ "Record (#{name})"
80
+ end
81
+ end
82
+
83
+ module FormatableField
84
+ PAD_CHARACTER = "-"
85
+ MINIMUM_NUMBER_OF_DASHES = 5
86
+ def template(properties)
87
+ "%02d %s : [%s]%s" % [ properties[:position],
88
+ padded_field_name(name, properties[:longest_field_name]),
89
+ "%s",
90
+ padding(properties)
91
+ ]
92
+ end
93
+
94
+ protected
95
+ def padded_field_name(field_name, length_of_longest_field_name)
96
+ field_name.ljust(length_of_longest_field_name)
97
+ end
98
+
99
+ def padding(properties)
100
+ PAD_CHARACTER * number_of_pad_chars(properties)
101
+ end
102
+
103
+ def number_of_pad_chars(properties)
104
+ return MINIMUM_NUMBER_OF_DASHES unless self.respond_to?(:length)
105
+ return MINIMUM_NUMBER_OF_DASHES + properties[:longest_field_length] - length - (properties[:last_field?] ? 1 : 0)
106
+ end
107
+ end
108
+ end
109
+ end
@@ -0,0 +1,21 @@
1
+ module AsciiDataTools
2
+ class << self
3
+ def configure(&block)
4
+ record_types.instance_eval(&block)
5
+ end
6
+
7
+ def record_types
8
+ @record_types ||= RecordType::RecordTypeRepository.new
9
+ end
10
+
11
+ def autodiscover
12
+ require 'rubygems'
13
+
14
+ configuration_files_from_newest_gem_versions = Gem.find_files('ascii-data-tools/discover.rb').select do |path|
15
+ Gem.latest_load_paths.any? {|load_path| path.include?(load_path)} or not path.include?(Gem.default_dir)
16
+ end
17
+
18
+ configuration_files_from_newest_gem_versions.each {|f| load f}
19
+ end
20
+ end
21
+ end