ascii-data-tools 0.9

Sign up to get free protection for your applications and to get access to all the features.
Files changed (70) hide show
  1. data/.gitignore +3 -0
  2. data/.rvmrc +1 -0
  3. data/.travis.yml +4 -0
  4. data/Gemfile +3 -0
  5. data/Gemfile.lock +40 -0
  6. data/LICENSE.GPL2 +339 -0
  7. data/README.rdoc +52 -0
  8. data/Rakefile +42 -0
  9. data/TODO +4 -0
  10. data/ascii-data-tools.gemspec +30 -0
  11. data/bin/ascii-data-cat +13 -0
  12. data/bin/ascii-data-edit +13 -0
  13. data/bin/ascii-data-norm +13 -0
  14. data/bin/ascii-data-qdiff +13 -0
  15. data/bin/ascii-data-tools-config +9 -0
  16. data/examples/big +10000 -0
  17. data/examples/built_in_records.gz +0 -0
  18. data/examples/slightly_modified_built_in_records.gz +0 -0
  19. data/features/ascii-data-cat.feature +110 -0
  20. data/features/ascii-data-edit.feature +91 -0
  21. data/features/ascii-data-qdiff.feature +54 -0
  22. data/features/encoding_decoding.feature +68 -0
  23. data/features/normaliser.feature +27 -0
  24. data/features/plugins.feature +73 -0
  25. data/features/record_recognition.feature +61 -0
  26. data/features/step_definitions/ascii-data-cat_steps.rb +48 -0
  27. data/features/step_definitions/ascii-data-edit_steps.rb +38 -0
  28. data/features/step_definitions/ascii-data-norm_steps.rb +7 -0
  29. data/features/step_definitions/ascii-data-qdiff_steps.rb +43 -0
  30. data/features/step_definitions/encoding_decoding_steps.rb +23 -0
  31. data/features/step_definitions/plugins_steps.rb +11 -0
  32. data/features/step_definitions/record_recognition_steps.rb +10 -0
  33. data/features/support/env.rb +5 -0
  34. data/lib/ascii-data-tools.rb +8 -0
  35. data/lib/ascii-data-tools/configuration.rb +169 -0
  36. data/lib/ascii-data-tools/configuration_printer.rb +38 -0
  37. data/lib/ascii-data-tools/controller.rb +123 -0
  38. data/lib/ascii-data-tools/discover.rb +19 -0
  39. data/lib/ascii-data-tools/external_programs.rb +23 -0
  40. data/lib/ascii-data-tools/filter.rb +148 -0
  41. data/lib/ascii-data-tools/filter/diffing.rb +139 -0
  42. data/lib/ascii-data-tools/formatting.rb +109 -0
  43. data/lib/ascii-data-tools/global_autodiscovery.rb +21 -0
  44. data/lib/ascii-data-tools/record.rb +50 -0
  45. data/lib/ascii-data-tools/record_type.rb +139 -0
  46. data/lib/ascii-data-tools/record_type/builder.rb +50 -0
  47. data/lib/ascii-data-tools/record_type/decoder.rb +77 -0
  48. data/lib/ascii-data-tools/record_type/encoder.rb +17 -0
  49. data/lib/ascii-data-tools/record_type/field.rb +168 -0
  50. data/lib/ascii-data-tools/record_type/normaliser.rb +38 -0
  51. data/lib/ascii-data-tools/ruby_extensions.rb +7 -0
  52. data/lib/ascii-data-tools/version.rb +3 -0
  53. data/spec/ascii-data-tools/configuration_printer_spec.rb +51 -0
  54. data/spec/ascii-data-tools/configuration_spec.rb +153 -0
  55. data/spec/ascii-data-tools/discover_spec.rb +8 -0
  56. data/spec/ascii-data-tools/filter/diffing_spec.rb +82 -0
  57. data/spec/ascii-data-tools/filter_spec.rb +107 -0
  58. data/spec/ascii-data-tools/formatting_spec.rb +106 -0
  59. data/spec/ascii-data-tools/record_spec.rb +49 -0
  60. data/spec/ascii-data-tools/record_type/builder_spec.rb +69 -0
  61. data/spec/ascii-data-tools/record_type/decoder_spec.rb +73 -0
  62. data/spec/ascii-data-tools/record_type/encoder_spec.rb +32 -0
  63. data/spec/ascii-data-tools/record_type/field_spec.rb +160 -0
  64. data/spec/ascii-data-tools/record_type/normaliser_spec.rb +25 -0
  65. data/spec/ascii-data-tools/record_type_spec.rb +175 -0
  66. data/spec/filter_helper.rb +24 -0
  67. data/spec/record_type_helpers.rb +8 -0
  68. data/spec/spec.opts +2 -0
  69. data/spec/spec_helper.rb +5 -0
  70. metadata +196 -0
@@ -0,0 +1,50 @@
1
+ require 'set'
2
+
3
+ module AsciiDataTools
4
+ module Record
5
+ class Record
6
+ attr_reader :type
7
+
8
+ def initialize(type, content_values)
9
+ @type = type
10
+ @values_by_type = {:content => content_values}
11
+ end
12
+
13
+ def [](requested_field_name)
14
+ requested_key_value_pair = field_names_and_values.detect {|field_name, value| field_name == requested_field_name }
15
+ raise "Field name '#{requested_field_name}' does not exist!" if requested_key_value_pair.nil?
16
+ requested_key_value_pair.last
17
+ end
18
+
19
+ def type_name
20
+ @type.name
21
+ end
22
+
23
+ def to_a
24
+ @type.field_names.zip(values)
25
+ end
26
+
27
+ def values
28
+ @values_by_type[:content]
29
+ end
30
+
31
+ def encode
32
+ @type.encode(values)
33
+ end
34
+
35
+ def to_s
36
+ contents = field_names_and_values.map {|field_name, value| "#{field_name} => #{value.inspect}"}.join(", ")
37
+ "#{type_name}: #{contents}"
38
+ end
39
+
40
+ def ==(other)
41
+ self.type == other.type and self.values == other.values
42
+ end
43
+
44
+ protected
45
+ def field_names_and_values
46
+ @type.field_names.zip(values)
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,139 @@
1
+ require 'set'
2
+ require 'forwardable'
3
+ require 'ascii-data-tools/record_type/field'
4
+ require 'ascii-data-tools/record_type/builder'
5
+ require 'ascii-data-tools/record_type/normaliser'
6
+ require 'ascii-data-tools/record_type/decoder'
7
+ require 'ascii-data-tools/record_type/encoder'
8
+
9
+ module AsciiDataTools
10
+ module RecordType
11
+ module FixedLengthType
12
+ include Decoder::FixedLengthRecordDecoder
13
+ include Encoder::FixedLengthRecordEncoder
14
+ include Normaliser::Normaliser
15
+
16
+ def total_length_of_fields
17
+ @total_length ||= fields.inject(0) {|sum, field| sum + field.length}
18
+ end
19
+ end
20
+
21
+ module CsvType
22
+ include Decoder::CsvRecordDecoder
23
+ include Encoder::CsvRecordEncoder
24
+ end
25
+
26
+ class Type
27
+ extend Forwardable
28
+ attr_reader :name
29
+
30
+ def_delegator :fields, :names, :field_names
31
+ def_delegator :fields, :with_index, :field_with_index
32
+
33
+ def initialize(name, content_fields = Field::Fields.new)
34
+ @name = name
35
+ @fields_by_type = {:content => content_fields, :meta => make_meta_fields}
36
+ end
37
+
38
+ def field_with_name(name)
39
+ all_fields.with_name(name)
40
+ end
41
+
42
+ def method_missing(method_name, *args, &block)
43
+ content_fields.send(method_name, *args, &block)
44
+ end
45
+
46
+ def filename_should_match(value)
47
+ field_with_name(:filename).should_be_constrained_to(value)
48
+ end
49
+
50
+ protected
51
+ attr_reader :fields_by_type
52
+
53
+ def content_fields
54
+ @fields_by_type[:content]
55
+ end
56
+
57
+ alias :fields :content_fields
58
+
59
+ def make_meta_fields
60
+ Field::Fields.new([Field::Field.new(:filename), Field::ConstantField.new(:divider)])
61
+ end
62
+
63
+ def all_fields
64
+ Field::Fields.new(@fields_by_type[:content] + @fields_by_type[:meta])
65
+ end
66
+ end
67
+
68
+ class UnknownType < Type
69
+ include Decoder::UnknownRecordDecoder
70
+ UNKNOWN_RECORD_TYPE_NAME = "unknown"
71
+
72
+ def initialize
73
+ super(UNKNOWN_RECORD_TYPE_NAME, Field::Fields.new([Field::Field.new("UNKNOWN")]))
74
+ end
75
+ end
76
+
77
+ class TypeDeterminer
78
+ def initialize(type_repo = RecordTypeRepository.new)
79
+ @all_types = type_repo
80
+ @previously_matched_types = RecordTypeRepository.new
81
+ end
82
+
83
+ def determine_type_for(encoded_record)
84
+ matching_type =
85
+ @previously_matched_types.identify_type_for(encoded_record) ||
86
+ @all_types.identify_type_for(encoded_record)
87
+ if matching_type.nil?
88
+ return UnknownType.new
89
+ else
90
+ @previously_matched_types << matching_type
91
+ return matching_type
92
+ end
93
+ end
94
+ end
95
+
96
+ class RecordTypeRepository
97
+ include Enumerable
98
+ include Builder::TypeBuilder
99
+
100
+ def initialize(types = [])
101
+ @types = Set.new(types)
102
+ end
103
+
104
+ def <<(type)
105
+ @types << type
106
+ end
107
+
108
+ def clear
109
+ @types.clear
110
+ end
111
+
112
+ def find_by_name(name)
113
+ detect {|type| type.name == name}
114
+ end
115
+
116
+ alias :type :find_by_name
117
+
118
+ def each(&block)
119
+ @types.each(&block)
120
+ end
121
+
122
+ def identify_type_for(encoded_record)
123
+ @types.detect {|type| type.able_to_decode?(encoded_record) }
124
+ end
125
+
126
+ def for_names_matching(matcher, &block)
127
+ if matcher.is_a?(Regexp)
128
+ select {|type| type.name =~ matcher}.each {|found_type| block[found_type]}
129
+ elsif matcher.is_a?(Proc)
130
+ select {|type| matcher[type.name]}.each {|found_type| block[found_type]}
131
+ end
132
+ end
133
+
134
+ def record_type(name, props = {}, &definition)
135
+ self << build_type(name, props, &definition)
136
+ end
137
+ end
138
+ end
139
+ end
@@ -0,0 +1,50 @@
1
+ require 'ascii-data-tools/record_type/field'
2
+
3
+ module AsciiDataTools
4
+ module RecordType
5
+ module Builder
6
+ module FieldBuilder
7
+ def build_field(name, properties = {})
8
+ field = Field::FixedLengthField.new(name, properties[:length])
9
+ field.should_be_constrained_to(properties[:constrained_to]) unless properties[:constrained_to].nil?
10
+ field.should_be_normalised if properties[:normalised]
11
+ field
12
+ end
13
+ end
14
+
15
+ module TypeBuilder
16
+ include FieldBuilder
17
+ def build_type(type_name, properties = {}, &block)
18
+ build_fields(&block)
19
+ type = Type.new(type_name, @fields)
20
+
21
+ type_family = determine_type_family_from(properties)
22
+ type.extend(type_family)
23
+
24
+ type.field_with_name(:filename).should_be_constrained_to(properties[:applies_for_filenames_matching])
25
+ type.field_with_name(:divider).value = properties[:divider]
26
+ type
27
+ end
28
+
29
+ def build_fields(&block)
30
+ @fields = Field::Fields.new
31
+ instance_eval(&block) unless block.nil?
32
+ @fields
33
+ end
34
+
35
+ protected
36
+ def field(name, properties = {})
37
+ @fields << build_field(name, properties)
38
+ end
39
+
40
+ def determine_type_family_from(properties)
41
+ case properties[:family]
42
+ when "csv" then CsvType
43
+ when "fixed_length" then FixedLengthType
44
+ when NilClass then FixedLengthType
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,77 @@
1
+ module AsciiDataTools
2
+ module RecordType
3
+ module Decoder
4
+ module RecordDecoder
5
+ def able_to_decode?(encoded_record)
6
+ able_to_decode_content?(encoded_record[:ascii_string]) and meta_fields_valid?(encoded_record)
7
+ end
8
+
9
+ def decode(encoded_record)
10
+ Record::Record.new(self, split_into_values(encoded_record[:ascii_string]))
11
+ end
12
+
13
+ protected
14
+ def able_to_decode_content?(encoded_string)
15
+ raise "Must be implemented in submodule!"
16
+ end
17
+
18
+ def split_into_values(ascii_string)
19
+ raise "Must be implemented in submodule!"
20
+ end
21
+
22
+ def meta_fields_valid?(encoded_record)
23
+ encoded_record[:filename].nil? or filename_field.valid_input?(encoded_record[:filename])
24
+ end
25
+
26
+ def filename_field
27
+ @filename_field ||= fields_by_type[:meta].with_name(:filename)
28
+ end
29
+ end
30
+
31
+ module RegexpBasedDecoder
32
+ include RecordDecoder
33
+
34
+ protected
35
+ def able_to_decode_content?(encoded_string)
36
+ encoded_string =~ regexp_for_matching_type
37
+ end
38
+
39
+ def split_into_values(ascii_string)
40
+ ascii_string.match(regexp_for_matching_type).to_a[1..-1]
41
+ end
42
+
43
+ def regexp_for_matching_type
44
+ @regexp ||= Regexp.new(regexp_string, Regexp::MULTILINE)
45
+ end
46
+
47
+ def regexp_string
48
+ raise "Must be implemented in submodule!"
49
+ end
50
+ end
51
+
52
+ module FixedLengthRecordDecoder
53
+ include RegexpBasedDecoder
54
+
55
+ protected
56
+ def regexp_string
57
+ content_fields.inject("\\A") {|regexp_string, field| field.extend_regexp_string_for_matching(regexp_string) } + "\\z"
58
+ end
59
+ end
60
+
61
+ module CsvRecordDecoder
62
+ include RegexpBasedDecoder
63
+
64
+ protected
65
+ def regexp_string
66
+ "\\A" + "(.*?)" + ((field_with_name(:divider).value + "(.*?)") * (content_fields.length - 1)) + "\n\\z"
67
+ end
68
+ end
69
+
70
+ module UnknownRecordDecoder
71
+ def decode(encoded_record)
72
+ Record::Record.new(self, [encoded_record[:ascii_string]])
73
+ end
74
+ end
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,17 @@
1
+ module AsciiDataTools
2
+ module RecordType
3
+ module Encoder
4
+ module FixedLengthRecordEncoder
5
+ def encode(values)
6
+ values.join
7
+ end
8
+ end
9
+
10
+ module CsvRecordEncoder
11
+ def encode(values)
12
+ values.join(field_with_name(:divider).value) + "\n"
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,168 @@
1
+ module AsciiDataTools
2
+ module RecordType
3
+ module Field
4
+ module GreppableFields
5
+ def with_name(field_name)
6
+ self.detect {|field| field.name == field_name}
7
+ end
8
+
9
+ def fields_with(&block)
10
+ Fields.new(self.select(&block))
11
+ end
12
+
13
+ def with_index(index)
14
+ self[index-1]
15
+ end
16
+ end
17
+
18
+ class Fields < Array
19
+ include GreppableFields
20
+ def names
21
+ self.collect {|f| f.name}
22
+ end
23
+
24
+ def number_of_content_fields
25
+ self.size
26
+ end
27
+
28
+ def length_of_longest_field_name
29
+ @length_of_longest_field_name ||= names.max_by {|name| name.length }.length
30
+ end
31
+
32
+ def constraints_description
33
+ self.reject {|field| field.constraint_description.empty? }.map {|field| field.constraint_description}.join(", ")
34
+ end
35
+
36
+ def should_be_normalised
37
+ self.each {|field| field.should_be_normalised}
38
+ end
39
+
40
+ def names_of_normalised_fields
41
+ self.select {|field| field.normalised?}.map {|field| field.name}.join(", ")
42
+ end
43
+ end
44
+
45
+ class Field
46
+ attr_reader :name
47
+ attr_writer :constraint
48
+
49
+ def initialize(name, constraint = NoConstraint.new)
50
+ @name = name
51
+ @constraint = constraint
52
+ @normalised = false
53
+ end
54
+
55
+ def normalised?
56
+ @normalised
57
+ end
58
+
59
+ def should_be_normalised
60
+ @normalised = true
61
+ end
62
+
63
+ def constraint_description
64
+ unless @constraint.to_s.empty?
65
+ name.to_s + " " + @constraint.to_s
66
+ else
67
+ ""
68
+ end
69
+ end
70
+
71
+ def should_be_constrained_to(value)
72
+ if value.is_a?(Regexp)
73
+ @constraint = RegexpConstraint.new(value)
74
+ else
75
+ @constraint = OneOfConstraint.new(value)
76
+ end
77
+ end
78
+
79
+ def valid_input?(value)
80
+ @constraint.satisfied_by?(value)
81
+ end
82
+ end
83
+
84
+ class ConstantField < Field
85
+ attr_accessor :value
86
+ end
87
+
88
+ class FixedLengthField < Field
89
+ attr_reader :length
90
+
91
+ def initialize(name, length, constraint = nil)
92
+ super(name, constraint || FixedLengthConstraint.new(length))
93
+ @length = length
94
+ end
95
+
96
+ def extend_regexp_string_for_matching(regexp_string)
97
+ @constraint.extend_regexp_string_for_matching(regexp_string)
98
+ end
99
+ end
100
+
101
+ class NoConstraint
102
+ def extend_regexp_string_for_matching(regexp_string)
103
+ regexp_string
104
+ end
105
+
106
+ def satisfied_by?(string)
107
+ true
108
+ end
109
+
110
+ def to_s; ""; end
111
+ end
112
+
113
+ class Constraint
114
+ def satisfied_by?(string)
115
+ string =~ Regexp.new(extend_regexp_string_for_matching(""))
116
+ end
117
+ end
118
+
119
+ class FixedLengthConstraint < Constraint
120
+ def initialize(length)
121
+ @length = length
122
+ end
123
+
124
+ def extend_regexp_string_for_matching(regexp_string)
125
+ regexp_string + "(.{#{@length}})"
126
+ end
127
+
128
+ def to_s; ""; end
129
+ end
130
+
131
+ class OneOfConstraint < Constraint
132
+ def initialize(*possible_values)
133
+ @possible_values = possible_values.flatten
134
+ end
135
+
136
+ def extend_regexp_string_for_matching(regexp_string)
137
+ regexp_string + "(#{@possible_values.join('|')})"
138
+ end
139
+
140
+ def to_s
141
+ if @possible_values.length == 1
142
+ "= #{@possible_values.first}"
143
+ else
144
+ "one of #{@possible_values.join(', ')}"
145
+ end
146
+ end
147
+ end
148
+
149
+ class RegexpConstraint < Constraint
150
+ def initialize(regexp_that_must_match)
151
+ @regexp_that_must_match = regexp_that_must_match
152
+ end
153
+
154
+ def extend_regexp_string_for_matching(regexp_string)
155
+ regexp_string + @regexp_that_must_match.source
156
+ end
157
+
158
+ def satisfied_by?(string)
159
+ string =~ @regexp_that_must_match
160
+ end
161
+
162
+ def to_s
163
+ "=~ #{@regexp_that_must_match.inspect}"
164
+ end
165
+ end
166
+ end
167
+ end
168
+ end