ascii-data-tools 0.9

Sign up to get free protection for your applications and to get access to all the features.
Files changed (70) hide show
  1. data/.gitignore +3 -0
  2. data/.rvmrc +1 -0
  3. data/.travis.yml +4 -0
  4. data/Gemfile +3 -0
  5. data/Gemfile.lock +40 -0
  6. data/LICENSE.GPL2 +339 -0
  7. data/README.rdoc +52 -0
  8. data/Rakefile +42 -0
  9. data/TODO +4 -0
  10. data/ascii-data-tools.gemspec +30 -0
  11. data/bin/ascii-data-cat +13 -0
  12. data/bin/ascii-data-edit +13 -0
  13. data/bin/ascii-data-norm +13 -0
  14. data/bin/ascii-data-qdiff +13 -0
  15. data/bin/ascii-data-tools-config +9 -0
  16. data/examples/big +10000 -0
  17. data/examples/built_in_records.gz +0 -0
  18. data/examples/slightly_modified_built_in_records.gz +0 -0
  19. data/features/ascii-data-cat.feature +110 -0
  20. data/features/ascii-data-edit.feature +91 -0
  21. data/features/ascii-data-qdiff.feature +54 -0
  22. data/features/encoding_decoding.feature +68 -0
  23. data/features/normaliser.feature +27 -0
  24. data/features/plugins.feature +73 -0
  25. data/features/record_recognition.feature +61 -0
  26. data/features/step_definitions/ascii-data-cat_steps.rb +48 -0
  27. data/features/step_definitions/ascii-data-edit_steps.rb +38 -0
  28. data/features/step_definitions/ascii-data-norm_steps.rb +7 -0
  29. data/features/step_definitions/ascii-data-qdiff_steps.rb +43 -0
  30. data/features/step_definitions/encoding_decoding_steps.rb +23 -0
  31. data/features/step_definitions/plugins_steps.rb +11 -0
  32. data/features/step_definitions/record_recognition_steps.rb +10 -0
  33. data/features/support/env.rb +5 -0
  34. data/lib/ascii-data-tools.rb +8 -0
  35. data/lib/ascii-data-tools/configuration.rb +169 -0
  36. data/lib/ascii-data-tools/configuration_printer.rb +38 -0
  37. data/lib/ascii-data-tools/controller.rb +123 -0
  38. data/lib/ascii-data-tools/discover.rb +19 -0
  39. data/lib/ascii-data-tools/external_programs.rb +23 -0
  40. data/lib/ascii-data-tools/filter.rb +148 -0
  41. data/lib/ascii-data-tools/filter/diffing.rb +139 -0
  42. data/lib/ascii-data-tools/formatting.rb +109 -0
  43. data/lib/ascii-data-tools/global_autodiscovery.rb +21 -0
  44. data/lib/ascii-data-tools/record.rb +50 -0
  45. data/lib/ascii-data-tools/record_type.rb +139 -0
  46. data/lib/ascii-data-tools/record_type/builder.rb +50 -0
  47. data/lib/ascii-data-tools/record_type/decoder.rb +77 -0
  48. data/lib/ascii-data-tools/record_type/encoder.rb +17 -0
  49. data/lib/ascii-data-tools/record_type/field.rb +168 -0
  50. data/lib/ascii-data-tools/record_type/normaliser.rb +38 -0
  51. data/lib/ascii-data-tools/ruby_extensions.rb +7 -0
  52. data/lib/ascii-data-tools/version.rb +3 -0
  53. data/spec/ascii-data-tools/configuration_printer_spec.rb +51 -0
  54. data/spec/ascii-data-tools/configuration_spec.rb +153 -0
  55. data/spec/ascii-data-tools/discover_spec.rb +8 -0
  56. data/spec/ascii-data-tools/filter/diffing_spec.rb +82 -0
  57. data/spec/ascii-data-tools/filter_spec.rb +107 -0
  58. data/spec/ascii-data-tools/formatting_spec.rb +106 -0
  59. data/spec/ascii-data-tools/record_spec.rb +49 -0
  60. data/spec/ascii-data-tools/record_type/builder_spec.rb +69 -0
  61. data/spec/ascii-data-tools/record_type/decoder_spec.rb +73 -0
  62. data/spec/ascii-data-tools/record_type/encoder_spec.rb +32 -0
  63. data/spec/ascii-data-tools/record_type/field_spec.rb +160 -0
  64. data/spec/ascii-data-tools/record_type/normaliser_spec.rb +25 -0
  65. data/spec/ascii-data-tools/record_type_spec.rb +175 -0
  66. data/spec/filter_helper.rb +24 -0
  67. data/spec/record_type_helpers.rb +8 -0
  68. data/spec/spec.opts +2 -0
  69. data/spec/spec_helper.rb +5 -0
  70. metadata +196 -0
@@ -0,0 +1,19 @@
1
+ AsciiDataTools.configure do
2
+ record_type("EXAMPLE01") do
3
+ field "RECORD_TYPE", :length => 9, :constrained_to => "EXAMPLE01"
4
+ field "USAGE", :length => 3
5
+ field "A_NUMBER", :length => 16
6
+ field "B_NUMBER", :length => 16
7
+ field "CHARGEABLE_UNITS", :length => 4
8
+ field "END_OF_RECORD", :length => 1
9
+ end
10
+
11
+ record_type("EXAMPLE02") do
12
+ field "RECORD_TYPE", :length => 9, :constrained_to => "EXAMPLE02"
13
+ field "APN", :length => 12
14
+ field "TIMESTAMP", :length => 14, :normalised => true
15
+ field "SESSION_DURATION", :length => 4
16
+ field "CHARGEABLE_UNITS", :length => 4
17
+ field "END_OF_RECORD", :length => 1
18
+ end
19
+ end
@@ -0,0 +1,23 @@
1
+ module AsciiDataTools
2
+ module ExternalPrograms
3
+ def modify_file_mtime_to(new_mtime)
4
+ Kernel.system("touch -am -t #{new_mtime.strftime('%Y%m%d%H%M.%S')} #{self.path}")
5
+ end
6
+
7
+ def diff(files)
8
+ IO.popen(diff_command_for(files))
9
+ end
10
+
11
+ def diff_command_for(files)
12
+ "diff " + files.collect(&:path).join(' ')
13
+ end
14
+
15
+ def sort(input_file, output_file)
16
+ Kernel.system("sort #{input_file.path} > #{output_file.path}")
17
+ end
18
+
19
+ def edit_differences(filenames)
20
+ Kernel.system("vimdiff #{filenames.join(' ')}")
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,148 @@
1
+ require 'tempfile'
2
+ require 'ascii-data-tools/external_programs'
3
+
4
+ module AsciiDataTools
5
+ module Filter
6
+ class Filter
7
+ def initialize(&block)
8
+ @filter_block = block
9
+ end
10
+
11
+ def <<(upstream_filter)
12
+ @upstream = upstream_filter
13
+ self
14
+ end
15
+
16
+ def read
17
+ filter(upstream.read)
18
+ end
19
+
20
+ def write(output_stream)
21
+ output_stream << read while has_records?
22
+ end
23
+
24
+ def has_records?
25
+ upstream.has_records?
26
+ end
27
+
28
+ protected
29
+ def filter(record)
30
+ @filter_block[record]
31
+ end
32
+
33
+ def upstream
34
+ @upstream
35
+ end
36
+ end
37
+
38
+ class BufferingFilter < Filter
39
+ def initialize(&block)
40
+ @first_time = true
41
+ @filter_all_block = block
42
+ end
43
+
44
+ protected
45
+ def filter_all(tempfile)
46
+ @filter_all_block[tempfile]
47
+ end
48
+
49
+ def filter(record)
50
+ record
51
+ end
52
+
53
+ def upstream
54
+ if @first_time
55
+ tempfile = buffer_as_tempfile(@upstream)
56
+ @upstream = InputSource.new(nil, filter_all(tempfile))
57
+ @first_time = false
58
+ end
59
+ @upstream
60
+ end
61
+
62
+ def buffer_as_tempfile(stream)
63
+ tempfile = Tempfile.new("filter")
64
+ tempfile << stream.read while stream.has_records?
65
+ tempfile.open
66
+ end
67
+ end
68
+
69
+ class FormattingFilter < Filter
70
+ def initialize(filename, type_determiner)
71
+ @formatter = Formatting::Formatter.new
72
+ @filename = filename
73
+ @type_determiner = type_determiner
74
+ end
75
+
76
+ def filter(record)
77
+ encoded_record = {:ascii_string => record, :filename => @filename}
78
+ type = @type_determiner.determine_type_for(encoded_record)
79
+ decoded_record = type.decode(encoded_record)
80
+ @formatter.format(decoded_record)
81
+ end
82
+ end
83
+
84
+ class NormalisingFilter < Filter
85
+ def initialize(filename, type_determiner)
86
+ @filename = filename
87
+ @type_determiner = type_determiner
88
+ end
89
+
90
+ def filter(record)
91
+ type = @type_determiner.determine_type_for(:ascii_string => record, :filename => @filename)
92
+ type.normalise(record)
93
+ end
94
+ end
95
+
96
+ class SortingFilter < BufferingFilter
97
+ include ExternalPrograms
98
+ def filter_all(tempfile)
99
+ tempfile.close
100
+
101
+ sorted_tempfile = Tempfile.new("sort")
102
+ sorted_tempfile.close
103
+
104
+ sort(tempfile, sorted_tempfile)
105
+ sorted_tempfile.open
106
+ end
107
+ end
108
+
109
+ class ParsingFilter < Filter
110
+ def initialize(record_types)
111
+ @record_types = record_types
112
+ end
113
+
114
+ def filter(record)
115
+ header_line = record
116
+ record_type = identify_record_type_from(header_line)
117
+ values = parse_values_from_subsequent_lines(record_type)
118
+
119
+ consume_empty_line_between_records
120
+
121
+ AsciiDataTools::Record::Record.new(record_type, values)
122
+ end
123
+
124
+ protected
125
+ def identify_record_type_from(header_line)
126
+ match = header_line.match(/Record \d+ \((.*?)\)/)
127
+ raise "Cannot find record type in line #{header_line}!" unless match
128
+ expected_record_type_name = match[1]
129
+ @record_types.find_by_name(expected_record_type_name)
130
+ end
131
+
132
+ def parse_values_from_subsequent_lines(record_type)
133
+ values = []
134
+ record_type.number_of_content_fields.times { values << parse_value_from(upstream.read) }
135
+ values
136
+ end
137
+
138
+ def parse_value_from(line)
139
+ value = line.match(/.*?\[(.*?)\]/)[1]
140
+ value.gsub("\\n", "\n")
141
+ end
142
+
143
+ def consume_empty_line_between_records
144
+ upstream.read
145
+ end
146
+ end
147
+ end
148
+ end
@@ -0,0 +1,139 @@
1
+ require 'ascii-data-tools/external_programs'
2
+
3
+ module AsciiDataTools
4
+ module Filter
5
+ module Diffing
6
+ class DiffFormattingFilter < FormattingFilter
7
+ DIFF_DELIMITER = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\n"
8
+
9
+ def initialize(type_determiner)
10
+ super(nil, type_determiner)
11
+ def @formatter.formattable(record_type)
12
+ record_type.extend(Formatting::UnnumberedFormatableType) unless record_type.instance_of?(Formatting::UnnumberedFormatableType)
13
+ end
14
+ end
15
+
16
+ def write(*streams)
17
+ while upstream.has_records?
18
+ difference = upstream.read
19
+ difference.left_contents.each {|rec| streams[0] << filter(rec)}
20
+ difference.right_contents.each {|rec| streams[1] << filter(rec)}
21
+ if upstream.has_records?
22
+ streams[0] << DIFF_DELIMITER
23
+ streams[1] << DIFF_DELIMITER
24
+ end
25
+ end
26
+ end
27
+ end
28
+
29
+ class DiffExecutingFilter < BufferingFilter
30
+ include ExternalPrograms
31
+ def initialize
32
+ super do |tempfiles|
33
+ stream = diff(tempfiles)
34
+ raise StreamsEqualException.new if stream.eof?
35
+ stream
36
+ end
37
+ end
38
+
39
+ protected
40
+ def upstream
41
+ if @first_time
42
+ tempfiles = @upstream.collect {|stream| buffer_as_tempfile(stream)}
43
+ @upstream = InputSource.new(nil, filter_all(tempfiles))
44
+ @first_time = false
45
+ end
46
+ @upstream
47
+ end
48
+ end
49
+
50
+ class StreamsEqualException < Exception
51
+ def initialize
52
+ super("The streams are equal")
53
+ end
54
+ end
55
+
56
+ class Difference
57
+ attr_reader :left_contents, :right_contents
58
+
59
+ def initialize(left_range, right_range)
60
+ @left_range, @right_range = left_range, right_range
61
+ @left_contents, @right_contents = [], []
62
+ @consumed_lines = 0
63
+ end
64
+
65
+ def complete?
66
+ @consumed_lines == lines_to_consume
67
+ end
68
+
69
+ def consume(line)
70
+ if line =~ /^[<>]/
71
+ case line[0..0]
72
+ when "<" then @left_contents << line[2..-1]
73
+ when ">" then @right_contents << line[2..-1]
74
+ end
75
+ end
76
+ @consumed_lines += 1
77
+ end
78
+
79
+ protected
80
+ def left_range_length
81
+ @left_range.to_a.length
82
+ end
83
+
84
+ def right_range_length
85
+ @right_range.to_a.length
86
+ end
87
+ end
88
+
89
+ class ConflictDifference < Difference
90
+ protected
91
+ def lines_to_consume
92
+ left_range_length + right_range_length + 1
93
+ end
94
+ end
95
+
96
+ class AdditionDifference < Difference
97
+ protected
98
+ def lines_to_consume
99
+ right_range_length
100
+ end
101
+ end
102
+
103
+ class DeletionDifference < Difference
104
+ protected
105
+ def lines_to_consume
106
+ left_range_length
107
+ end
108
+ end
109
+
110
+ class DiffParsingFilter < Filter
111
+ def initialize
112
+ super do |record|
113
+ difference = make_difference_from(record)
114
+ difference.consume(upstream.read) until difference.complete?
115
+ difference
116
+ end
117
+ end
118
+
119
+ protected
120
+ def make_difference_from(command_record)
121
+ left, command, right = split(command_record)
122
+ case command
123
+ when "c" then ConflictDifference.new(left, right)
124
+ when "a" then AdditionDifference.new(left, right)
125
+ when "d" then DeletionDifference.new(left, right)
126
+ else raise "Cannot parse diff line: #{command_record}"
127
+ end
128
+ end
129
+
130
+ def split(command_record)
131
+ left_start, _, left_end, command, right_start, _, right_end = command_record.scan(/^(\d([,](\d))?)(c|a|d)(\d([,](\d))?)\Z/).first
132
+ left_range = left_start.to_i..(left_end || left_start).to_i
133
+ right_range = right_start.to_i..(right_end || right_start).to_i
134
+ [left_range, command, right_range]
135
+ end
136
+ end
137
+ end
138
+ end
139
+ end
@@ -0,0 +1,109 @@
1
+ module AsciiDataTools
2
+ module Formatting
3
+ class Formatter
4
+ def initialize
5
+ @record_counter = 0
6
+ end
7
+
8
+ def format(record)
9
+ formattable(record.type).format(next_record_counter_and_increment, record.values)
10
+ end
11
+ alias :transform :format
12
+
13
+ protected
14
+ def next_record_counter_and_increment
15
+ @record_counter += 1
16
+ end
17
+
18
+ def formattable(record_type)
19
+ record_type.extend(FormatableType) unless record_type.instance_of?(FormatableType)
20
+ end
21
+ end
22
+
23
+ module FormatableType
24
+ def format(record_number, values)
25
+ template % ([record_number] + escape_last(values))
26
+ end
27
+
28
+ protected
29
+ def template
30
+ @template ||= [header_template, field_templates, footer].flatten.join("\n")
31
+ end
32
+
33
+ def header_template
34
+ "Record %02d (#{name})"
35
+ end
36
+
37
+ def field_templates
38
+ make_fields_formatable_if_necessary
39
+ fields.enum_with_index.map do |field, index|
40
+ field.template(:position => index + 1,
41
+ :longest_field_name => length_of_longest_field_name,
42
+ :longest_field_length => longest_field_length,
43
+ :last_field? => (index + 1 == fields.length))
44
+ end
45
+ end
46
+
47
+ def make_fields_formatable_if_necessary
48
+ fields.each {|field| field.extend(FormatableField) unless field.is_a?(FormatableField) }
49
+ end
50
+
51
+ def longest_field_length
52
+ if fields.all? {|field| field.respond_to?(:length)}
53
+ fields.max_by {|field| field.length }.length
54
+ else
55
+ 0
56
+ end
57
+ end
58
+
59
+ def escape_last(values)
60
+ new_values = values.clone
61
+ new_values[-1] = values[-1].gsub("\n", "\\n")
62
+ new_values
63
+ end
64
+
65
+ def footer
66
+ "\n"
67
+ end
68
+ end
69
+
70
+ module UnnumberedFormatableType
71
+ include FormatableType
72
+
73
+ def format(record_number, values)
74
+ template % escape_last(values)
75
+ end
76
+
77
+ protected
78
+ def header_template
79
+ "Record (#{name})"
80
+ end
81
+ end
82
+
83
+ module FormatableField
84
+ PAD_CHARACTER = "-"
85
+ MINIMUM_NUMBER_OF_DASHES = 5
86
+ def template(properties)
87
+ "%02d %s : [%s]%s" % [ properties[:position],
88
+ padded_field_name(name, properties[:longest_field_name]),
89
+ "%s",
90
+ padding(properties)
91
+ ]
92
+ end
93
+
94
+ protected
95
+ def padded_field_name(field_name, length_of_longest_field_name)
96
+ field_name.ljust(length_of_longest_field_name)
97
+ end
98
+
99
+ def padding(properties)
100
+ PAD_CHARACTER * number_of_pad_chars(properties)
101
+ end
102
+
103
+ def number_of_pad_chars(properties)
104
+ return MINIMUM_NUMBER_OF_DASHES unless self.respond_to?(:length)
105
+ return MINIMUM_NUMBER_OF_DASHES + properties[:longest_field_length] - length - (properties[:last_field?] ? 1 : 0)
106
+ end
107
+ end
108
+ end
109
+ end
@@ -0,0 +1,21 @@
1
+ module AsciiDataTools
2
+ class << self
3
+ def configure(&block)
4
+ record_types.instance_eval(&block)
5
+ end
6
+
7
+ def record_types
8
+ @record_types ||= RecordType::RecordTypeRepository.new
9
+ end
10
+
11
+ def autodiscover
12
+ require 'rubygems'
13
+
14
+ configuration_files_from_newest_gem_versions = Gem.find_files('ascii-data-tools/discover.rb').select do |path|
15
+ Gem.latest_load_paths.any? {|load_path| path.include?(load_path)} or not path.include?(Gem.default_dir)
16
+ end
17
+
18
+ configuration_files_from_newest_gem_versions.each {|f| load f}
19
+ end
20
+ end
21
+ end