masticate 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Guardfile +10 -0
 - data/bin/masticate +13 -16
 - data/lib/masticate/csvify.rb +25 -6
 - data/lib/masticate/mender.rb +37 -23
 - data/lib/masticate/sniffer.rb +15 -15
 - data/lib/masticate/version.rb +1 -1
 - data/lib/masticate.rb +12 -10
 - data/masticate.gemspec +3 -1
 - data/spec/data/broken_psv.txt +4 -4
 - data/spec/data/junk_trailer.txt +8 -0
 - data/spec/lib/csvify_spec.rb +17 -0
 - data/spec/lib/mend_spec.rb +20 -0
 - data/spec/{spec → lib}/sniffer_spec.rb +4 -6
 - metadata +37 -12
 - data/spec/spec/csvify_spec.rb +0 -14
 - data/spec/spec/mend_spec.rb +0 -14
 
    
        data/Guardfile
    ADDED
    
    | 
         @@ -0,0 +1,10 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # A sample Guardfile
         
     | 
| 
      
 2 
     | 
    
         
            +
            # More info at https://github.com/guard/guard#readme
         
     | 
| 
      
 3 
     | 
    
         
            +
             
     | 
| 
      
 4 
     | 
    
         
            +
            guard 'rspec', :version => 2 do
         
     | 
| 
      
 5 
     | 
    
         
            +
              watch(%r{^spec/.+_spec\.rb$})
         
     | 
| 
      
 6 
     | 
    
         
            +
              watch(%r{^lib/(.+)\.rb$})     { |m| "spec/lib/#{m[1]}_spec.rb" }
         
     | 
| 
      
 7 
     | 
    
         
            +
              watch('spec/spec_helper.rb')  { "spec" }
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
      
 9 
     | 
    
         
            +
              watch(%r{^spec/support/(.+)\.rb$})                  { "spec" }
         
     | 
| 
      
 10 
     | 
    
         
            +
            end
         
     | 
    
        data/bin/masticate
    CHANGED
    
    | 
         @@ -1,31 +1,25 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            #!/usr/bin/env ruby
         
     | 
| 
       2 
2 
     | 
    
         | 
| 
       3 
     | 
    
         
            -
             
     | 
| 
      
 3 
     | 
    
         
            +
            require_relative "../lib/masticate"
         
     | 
| 
       4 
4 
     | 
    
         | 
| 
       5 
5 
     | 
    
         
             
            command, filename = ARGV
         
     | 
| 
       6 
6 
     | 
    
         | 
| 
       7 
7 
     | 
    
         
             
            case ARGV.shift
         
     | 
| 
       8 
8 
     | 
    
         
             
            when 'sniff'
         
     | 
| 
       9 
     | 
    
         
            -
               
     | 
| 
       10 
     | 
    
         
            -
              results = Masticate.sniff(file)
         
     | 
| 
      
 9 
     | 
    
         
            +
              results = Masticate.sniff(filename)
         
     | 
| 
       11 
10 
     | 
    
         
             
              col_sep = results[:col_sep]
         
     | 
| 
       12 
11 
     | 
    
         
             
              col_sep = "TAB" if col_sep == "\t"
         
     | 
| 
       13 
     | 
    
         
            -
              file.close
         
     | 
| 
       14 
12 
     | 
    
         
             
              $stderr.puts <<-EOT
         
     | 
| 
       15 
13 
     | 
    
         
             
            Processing complete.
         
     | 
| 
       16 
14 
     | 
    
         
             
              Input delimiter: #{col_sep}
         
     | 
| 
       17 
     | 
    
         
            -
              Field counts: #{results[:field_counts]. 
     | 
| 
      
 15 
     | 
    
         
            +
              Field counts: #{results[:field_counts].inspect}
         
     | 
| 
       18 
16 
     | 
    
         
             
            EOT
         
     | 
| 
       19 
17 
     | 
    
         | 
| 
       20 
18 
     | 
    
         
             
            when 'mend'
         
     | 
| 
       21 
     | 
    
         
            -
               
     | 
| 
       22 
     | 
    
         
            -
              metadata = Masticate.sniff(file)
         
     | 
| 
      
 19 
     | 
    
         
            +
              metadata = Masticate.sniff(filename)
         
     | 
| 
       23 
20 
     | 
    
         
             
              col_sep = metadata[:col_sep]
         
     | 
| 
       24 
21 
     | 
    
         
             
              col_sep = "TAB" if col_sep == "\t"
         
     | 
| 
       25 
     | 
    
         
            -
               
     | 
| 
       26 
     | 
    
         
            -
              file = File.open(filename)
         
     | 
| 
       27 
     | 
    
         
            -
              results = Masticate.mend(file, metadata.merge(:output => $stdout))
         
     | 
| 
       28 
     | 
    
         
            -
              file.close
         
     | 
| 
      
 22 
     | 
    
         
            +
              results = Masticate.mend(filename, metadata)
         
     | 
| 
       29 
23 
     | 
    
         
             
              $stderr.puts <<-EOT
         
     | 
| 
       30 
24 
     | 
    
         
             
            Processing complete.
         
     | 
| 
       31 
25 
     | 
    
         
             
              Input delimiter: #{col_sep}
         
     | 
| 
         @@ -34,11 +28,14 @@ Processing complete. 
     | 
|
| 
       34 
28 
     | 
    
         
             
            EOT
         
     | 
| 
       35 
29 
     | 
    
         | 
| 
       36 
30 
     | 
    
         
             
            when 'csvify'
         
     | 
| 
       37 
     | 
    
         
            -
               
     | 
| 
       38 
     | 
    
         
            -
               
     | 
| 
       39 
     | 
    
         
            -
               
     | 
| 
       40 
     | 
    
         
            -
             
     | 
| 
       41 
     | 
    
         
            -
               
     | 
| 
      
 31 
     | 
    
         
            +
              metadata = Masticate.sniff(filename)
         
     | 
| 
      
 32 
     | 
    
         
            +
              results = Masticate.csvify(filename, metadata)
         
     | 
| 
      
 33 
     | 
    
         
            +
              $stderr.puts <<-EOT
         
     | 
| 
      
 34 
     | 
    
         
            +
            Processing complete.
         
     | 
| 
      
 35 
     | 
    
         
            +
              Input delimiter: #{metadata[:col_sep]}
         
     | 
| 
      
 36 
     | 
    
         
            +
              Lines in input: #{results[:input_count]}
         
     | 
| 
      
 37 
     | 
    
         
            +
              Lines in output: #{results[:output_count]}
         
     | 
| 
      
 38 
     | 
    
         
            +
            EOT
         
     | 
| 
       42 
39 
     | 
    
         | 
| 
       43 
40 
     | 
    
         
             
            else
         
     | 
| 
       44 
41 
     | 
    
         
             
              raise "unknown command #{command}"
         
     | 
    
        data/lib/masticate/csvify.rb
    CHANGED
    
    | 
         @@ -2,19 +2,38 @@ 
     | 
|
| 
       2 
2 
     | 
    
         
             
            require "csv"
         
     | 
| 
       3 
3 
     | 
    
         | 
| 
       4 
4 
     | 
    
         
             
            class Masticate::Csvify
         
     | 
| 
       5 
     | 
    
         
            -
              attr_reader : 
     | 
| 
      
 5 
     | 
    
         
            +
              attr_reader :input
         
     | 
| 
       6 
6 
     | 
    
         | 
| 
       7 
     | 
    
         
            -
              def initialize( 
     | 
| 
       8 
     | 
    
         
            -
                @ 
     | 
| 
      
 7 
     | 
    
         
            +
              def initialize(filename)
         
     | 
| 
      
 8 
     | 
    
         
            +
                @input = File.open(filename)
         
     | 
| 
       9 
9 
     | 
    
         
             
              end
         
     | 
| 
       10 
10 
     | 
    
         | 
| 
       11 
11 
     | 
    
         
             
              def csvify(opts)
         
     | 
| 
      
 12 
     | 
    
         
            +
                @output = opts[:output] ? File.open(opts[:output], "w") : $stdout
         
     | 
| 
       12 
13 
     | 
    
         
             
                csv_options = {}
         
     | 
| 
       13 
14 
     | 
    
         
             
                csv_options[:col_sep] = opts[:col_sep] if opts[:col_sep]
         
     | 
| 
       14 
15 
     | 
    
         
             
                csv_options[:quote_char] = opts[:quote_char] || opts[:col_sep] if opts[:quote_char] || opts[:col_sep]
         
     | 
| 
       15 
     | 
    
         
            -
             
     | 
| 
       16 
     | 
    
         
            -
                 
     | 
| 
       17 
     | 
    
         
            -
             
     | 
| 
      
 16 
     | 
    
         
            +
             
     | 
| 
      
 17 
     | 
    
         
            +
                input_count = @output_count = 0
         
     | 
| 
      
 18 
     | 
    
         
            +
                CSV.foreach(input, csv_options) do |row|
         
     | 
| 
      
 19 
     | 
    
         
            +
                  input_count += 1
         
     | 
| 
      
 20 
     | 
    
         
            +
                  emit(row.to_csv)
         
     | 
| 
      
 21 
     | 
    
         
            +
                end
         
     | 
| 
      
 22 
     | 
    
         
            +
                @output.close if opts[:output]
         
     | 
| 
      
 23 
     | 
    
         
            +
                @input.close
         
     | 
| 
      
 24 
     | 
    
         
            +
                {
         
     | 
| 
      
 25 
     | 
    
         
            +
                  :input_count => input_count,
         
     | 
| 
      
 26 
     | 
    
         
            +
                  :output_count => @output_count
         
     | 
| 
      
 27 
     | 
    
         
            +
                }
         
     | 
| 
      
 28 
     | 
    
         
            +
              end
         
     | 
| 
      
 29 
     | 
    
         
            +
             
     | 
| 
      
 30 
     | 
    
         
            +
              def emit(line)
         
     | 
| 
      
 31 
     | 
    
         
            +
                @output_count += 1
         
     | 
| 
      
 32 
     | 
    
         
            +
                begin
         
     | 
| 
      
 33 
     | 
    
         
            +
                  @output.puts line
         
     | 
| 
      
 34 
     | 
    
         
            +
                rescue Errno::EPIPE
         
     | 
| 
      
 35 
     | 
    
         
            +
                  # output was closed, e.g. ran piped into `head`
         
     | 
| 
      
 36 
     | 
    
         
            +
                  # silently ignore this condition, it's not fatal and doesn't need a warning
         
     | 
| 
       18 
37 
     | 
    
         
             
                end
         
     | 
| 
       19 
38 
     | 
    
         
             
              end
         
     | 
| 
       20 
39 
     | 
    
         
             
            end
         
     | 
    
        data/lib/masticate/mender.rb
    CHANGED
    
    | 
         @@ -4,46 +4,60 @@ 
     | 
|
| 
       4 
4 
     | 
    
         
             
            # (due to a newline embedded in a field).  Glue those two lines into a single line in the output.
         
     | 
| 
       5 
5 
     | 
    
         | 
| 
       6 
6 
     | 
    
         
             
            class Masticate::Mender
         
     | 
| 
       7 
     | 
    
         
            -
              attr_reader : 
     | 
| 
      
 7 
     | 
    
         
            +
              attr_reader :input
         
     | 
| 
       8 
8 
     | 
    
         | 
| 
       9 
     | 
    
         
            -
              def initialize( 
     | 
| 
       10 
     | 
    
         
            -
                @ 
     | 
| 
      
 9 
     | 
    
         
            +
              def initialize(filename)
         
     | 
| 
      
 10 
     | 
    
         
            +
                @input = open(filename)
         
     | 
| 
       11 
11 
     | 
    
         
             
              end
         
     | 
| 
       12 
12 
     | 
    
         | 
| 
       13 
13 
     | 
    
         
             
              def mend(opts)
         
     | 
| 
       14 
     | 
    
         
            -
                output = opts[:output]
         
     | 
| 
      
 14 
     | 
    
         
            +
                @output = opts[:output] ? File.open(opts[:output], "w") : $stdout
         
     | 
| 
       15 
15 
     | 
    
         
             
                col_sep = opts[:col_sep]
         
     | 
| 
       16 
16 
     | 
    
         | 
| 
       17 
     | 
    
         
            -
                 
     | 
| 
       18 
     | 
    
         
            -
                @input_count = output_count = 0
         
     | 
| 
      
 17 
     | 
    
         
            +
                expected_delim_count = nil
         
     | 
| 
      
 18 
     | 
    
         
            +
                @input_count = @output_count = 0
         
     | 
| 
       19 
19 
     | 
    
         
             
                while (line = get) do
         
     | 
| 
       20 
     | 
    
         
            -
                   
     | 
| 
       21 
     | 
    
         
            -
                     
     | 
| 
       22 
     | 
    
         
            -
             
     | 
| 
       23 
     | 
    
         
            -
             
     | 
| 
       24 
     | 
    
         
            -
                     
     | 
| 
       25 
     | 
    
         
            -
             
     | 
| 
       26 
     | 
    
         
            -
                       
     | 
| 
       27 
     | 
    
         
            -
             
     | 
| 
       28 
     | 
    
         
            -
                         
     | 
| 
       29 
     | 
    
         
            -
             
     | 
| 
       30 
     | 
    
         
            -
             
     | 
| 
       31 
     | 
    
         
            -
                         
     | 
| 
      
 20 
     | 
    
         
            +
                  unless line =~ /^\s*$/
         
     | 
| 
      
 21 
     | 
    
         
            +
                    if !expected_delim_count
         
     | 
| 
      
 22 
     | 
    
         
            +
                      # trust the first row
         
     | 
| 
      
 23 
     | 
    
         
            +
                      expected_delim_count = line.count(col_sep)
         
     | 
| 
      
 24 
     | 
    
         
            +
                    else
         
     | 
| 
      
 25 
     | 
    
         
            +
                      running_count = line.count(col_sep)
         
     | 
| 
      
 26 
     | 
    
         
            +
                      while !input.eof? && running_count < expected_delim_count do
         
     | 
| 
      
 27 
     | 
    
         
            +
                        nextbit = get
         
     | 
| 
      
 28 
     | 
    
         
            +
                        if nextbit
         
     | 
| 
      
 29 
     | 
    
         
            +
                          line = line + ' ' + nextbit
         
     | 
| 
      
 30 
     | 
    
         
            +
                          running_count = line.count(col_sep)
         
     | 
| 
      
 31 
     | 
    
         
            +
                        end
         
     | 
| 
       32 
32 
     | 
    
         
             
                      end
         
     | 
| 
       33 
33 
     | 
    
         
             
                    end
         
     | 
| 
      
 34 
     | 
    
         
            +
                    if line.count(col_sep) > 2
         
     | 
| 
      
 35 
     | 
    
         
            +
                      emit(line)
         
     | 
| 
      
 36 
     | 
    
         
            +
                    end
         
     | 
| 
       34 
37 
     | 
    
         
             
                  end
         
     | 
| 
       35 
     | 
    
         
            -
                  output_count += 1
         
     | 
| 
       36 
     | 
    
         
            -
                  output << line
         
     | 
| 
       37 
38 
     | 
    
         
             
                end
         
     | 
| 
       38 
39 
     | 
    
         | 
| 
      
 40 
     | 
    
         
            +
                @input.close
         
     | 
| 
      
 41 
     | 
    
         
            +
                @output.close if opts[:output]
         
     | 
| 
       39 
42 
     | 
    
         
             
                {
         
     | 
| 
       40 
43 
     | 
    
         
             
                  :input_records => @input_count,
         
     | 
| 
       41 
     | 
    
         
            -
                  :output_records => output_count
         
     | 
| 
      
 44 
     | 
    
         
            +
                  :output_records => @output_count
         
     | 
| 
       42 
45 
     | 
    
         
             
                }
         
     | 
| 
       43 
46 
     | 
    
         
             
              end
         
     | 
| 
       44 
47 
     | 
    
         | 
| 
       45 
48 
     | 
    
         
             
              def get
         
     | 
| 
       46 
     | 
    
         
            -
                 
     | 
| 
       47 
     | 
    
         
            -
                 
     | 
| 
      
 49 
     | 
    
         
            +
                line = input.gets
         
     | 
| 
      
 50 
     | 
    
         
            +
                @input_count += 1
         
     | 
| 
      
 51 
     | 
    
         
            +
                line && line.chomp
         
     | 
| 
      
 52 
     | 
    
         
            +
              end
         
     | 
| 
      
 53 
     | 
    
         
            +
             
     | 
| 
      
 54 
     | 
    
         
            +
              def emit(line)
         
     | 
| 
      
 55 
     | 
    
         
            +
                @output_count += 1
         
     | 
| 
      
 56 
     | 
    
         
            +
                begin
         
     | 
| 
      
 57 
     | 
    
         
            +
                  @output.puts line
         
     | 
| 
      
 58 
     | 
    
         
            +
                rescue Errno::EPIPE
         
     | 
| 
      
 59 
     | 
    
         
            +
                  # output was closed, e.g. ran piped into `head`
         
     | 
| 
      
 60 
     | 
    
         
            +
                  # silently ignore this condition, it's not fatal and doesn't need a warning
         
     | 
| 
      
 61 
     | 
    
         
            +
                end
         
     | 
| 
       48 
62 
     | 
    
         
             
              end
         
     | 
| 
       49 
63 
     | 
    
         
             
            end
         
     | 
    
        data/lib/masticate/sniffer.rb
    CHANGED
    
    | 
         @@ -1,32 +1,29 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            class Masticate::Sniffer
         
     | 
| 
       2 
     | 
    
         
            -
              attr_reader : 
     | 
| 
       3 
     | 
    
         
            -
              attr_reader :col_sep
         
     | 
| 
      
 2 
     | 
    
         
            +
              attr_reader :col_sep, :stats
         
     | 
| 
       4 
3 
     | 
    
         | 
| 
       5 
4 
     | 
    
         
             
              CandidateDelimiters = [',', '|', "\t"]
         
     | 
| 
       6 
5 
     | 
    
         | 
| 
       7 
     | 
    
         
            -
              def initialize( 
     | 
| 
       8 
     | 
    
         
            -
                @ 
     | 
| 
       9 
     | 
    
         
            -
              end
         
     | 
| 
       10 
     | 
    
         
            -
             
     | 
| 
       11 
     | 
    
         
            -
              def self.sniff(file)
         
     | 
| 
       12 
     | 
    
         
            -
                sniffer = new(file)
         
     | 
| 
       13 
     | 
    
         
            -
                sniffer.sniff
         
     | 
| 
      
 6 
     | 
    
         
            +
              def initialize(filename)
         
     | 
| 
      
 7 
     | 
    
         
            +
                @filename = filename
         
     | 
| 
       14 
8 
     | 
    
         
             
              end
         
     | 
| 
       15 
9 
     | 
    
         | 
| 
       16 
10 
     | 
    
         
             
              def sniff
         
     | 
| 
       17 
11 
     | 
    
         
             
                @col_sep = find_col_sep
         
     | 
| 
      
 12 
     | 
    
         
            +
                @stats = stats
         
     | 
| 
       18 
13 
     | 
    
         
             
                {
         
     | 
| 
       19 
     | 
    
         
            -
                  :col_sep => col_sep,
         
     | 
| 
       20 
     | 
    
         
            -
                  :field_counts => stats
         
     | 
| 
      
 14 
     | 
    
         
            +
                  :col_sep => @col_sep,
         
     | 
| 
      
 15 
     | 
    
         
            +
                  :field_counts => @stats,
         
     | 
| 
      
 16 
     | 
    
         
            +
                  :line1 => @line1
         
     | 
| 
       21 
17 
     | 
    
         
             
                }
         
     | 
| 
       22 
18 
     | 
    
         
             
              end
         
     | 
| 
       23 
19 
     | 
    
         | 
| 
       24 
20 
     | 
    
         
             
              def find_col_sep
         
     | 
| 
       25 
     | 
    
         
            -
                 
     | 
| 
      
 21 
     | 
    
         
            +
                input = open(@filename)
         
     | 
| 
      
 22 
     | 
    
         
            +
                @line1 = input.lines.first
         
     | 
| 
       26 
23 
     | 
    
         
             
                delimcounts = CandidateDelimiters.each_with_object({}) do |delim,h|
         
     | 
| 
       27 
     | 
    
         
            -
                  h[delim] = consider_delim(line1, delim)
         
     | 
| 
      
 24 
     | 
    
         
            +
                  h[delim] = consider_delim(@line1, delim)
         
     | 
| 
       28 
25 
     | 
    
         
             
                end
         
     | 
| 
       29 
     | 
    
         
            -
                 
     | 
| 
      
 26 
     | 
    
         
            +
                input.close
         
     | 
| 
       30 
27 
     | 
    
         
             
                delimcounts.sort_by{|h,v| -v}.first.first
         
     | 
| 
       31 
28 
     | 
    
         
             
              end
         
     | 
| 
       32 
29 
     | 
    
         | 
| 
         @@ -35,6 +32,9 @@ class Masticate::Sniffer 
     | 
|
| 
       35 
32 
     | 
    
         
             
              end
         
     | 
| 
       36 
33 
     | 
    
         | 
| 
       37 
34 
     | 
    
         
             
              def stats
         
     | 
| 
       38 
     | 
    
         
            -
                 
     | 
| 
      
 35 
     | 
    
         
            +
                input = open(@filename)
         
     | 
| 
      
 36 
     | 
    
         
            +
                counts = input.lines.each_with_object(Hash.new(0)) {|line, counts| counts[line.split(col_sep).count] += 1}
         
     | 
| 
      
 37 
     | 
    
         
            +
                input.close
         
     | 
| 
      
 38 
     | 
    
         
            +
                counts
         
     | 
| 
       39 
39 
     | 
    
         
             
              end
         
     | 
| 
       40 
40 
     | 
    
         
             
            end
         
     | 
    
        data/lib/masticate/version.rb
    CHANGED
    
    
    
        data/lib/masticate.rb
    CHANGED
    
    | 
         @@ -1,18 +1,20 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            require " 
     | 
| 
       2 
     | 
    
         
            -
             
     | 
| 
       3 
     | 
    
         
            -
             
     | 
| 
       4 
     | 
    
         
            -
             
     | 
| 
      
 1 
     | 
    
         
            +
            require "open-uri"
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            require_relative "masticate/version"
         
     | 
| 
      
 4 
     | 
    
         
            +
            require_relative "masticate/sniffer"
         
     | 
| 
      
 5 
     | 
    
         
            +
            require_relative "masticate/mender"
         
     | 
| 
      
 6 
     | 
    
         
            +
            require_relative "masticate/csvify"
         
     | 
| 
       5 
7 
     | 
    
         | 
| 
       6 
8 
     | 
    
         
             
            module Masticate
         
     | 
| 
       7 
     | 
    
         
            -
              def self.sniff( 
     | 
| 
       8 
     | 
    
         
            -
                Sniffer.new( 
     | 
| 
      
 9 
     | 
    
         
            +
              def self.sniff(filename)
         
     | 
| 
      
 10 
     | 
    
         
            +
                Sniffer.new(filename).sniff
         
     | 
| 
       9 
11 
     | 
    
         
             
              end
         
     | 
| 
       10 
12 
     | 
    
         | 
| 
       11 
     | 
    
         
            -
              def self.mend( 
     | 
| 
       12 
     | 
    
         
            -
                Mender.new( 
     | 
| 
      
 13 
     | 
    
         
            +
              def self.mend(filename, opts)
         
     | 
| 
      
 14 
     | 
    
         
            +
                Mender.new(filename).mend(opts)
         
     | 
| 
       13 
15 
     | 
    
         
             
              end
         
     | 
| 
       14 
16 
     | 
    
         | 
| 
       15 
     | 
    
         
            -
              def self.csvify( 
     | 
| 
       16 
     | 
    
         
            -
                Csvify.new( 
     | 
| 
      
 17 
     | 
    
         
            +
              def self.csvify(filename, opts)
         
     | 
| 
      
 18 
     | 
    
         
            +
                Csvify.new(filename).csvify(opts)
         
     | 
| 
       17 
19 
     | 
    
         
             
              end
         
     | 
| 
       18 
20 
     | 
    
         
             
            end
         
     | 
    
        data/masticate.gemspec
    CHANGED
    
    | 
         @@ -16,5 +16,7 @@ Gem::Specification.new do |gem| 
     | 
|
| 
       16 
16 
     | 
    
         
             
              gem.require_paths = ["lib"]
         
     | 
| 
       17 
17 
     | 
    
         
             
              gem.version       = Masticate::VERSION
         
     | 
| 
       18 
18 
     | 
    
         | 
| 
       19 
     | 
    
         
            -
              gem.add_development_dependency "rspec"
         
     | 
| 
      
 19 
     | 
    
         
            +
              gem.add_development_dependency "rspec", "~> 2.9.0"
         
     | 
| 
      
 20 
     | 
    
         
            +
              gem.add_development_dependency "guard-rspec", "~> 0.7.0"
         
     | 
| 
      
 21 
     | 
    
         
            +
              gem.add_development_dependency "ruby_gntp", "~> 0.3.4"
         
     | 
| 
       20 
22 
     | 
    
         
             
            end
         
     | 
    
        data/spec/data/broken_psv.txt
    CHANGED
    
    | 
         @@ -1,6 +1,6 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            COL1|COL 2|Col 3   |col-4|   col5  |col6
         
     | 
| 
       2 
     | 
    
         
            -
             
     | 
| 
       3 
     | 
    
         
            -
             
     | 
| 
       4 
     | 
    
         
            -
             
     | 
| 
      
 2 
     | 
    
         
            +
            data1|  data |data  |d a t a|data|data
         
     | 
| 
      
 3 
     | 
    
         
            +
            data2|  data |data  |d a t a|data|data
         
     | 
| 
      
 4 
     | 
    
         
            +
            data3|  data |this long row
         
     | 
| 
       5 
5 
     | 
    
         
             
            is split across lines|d a t a|data|data
         
     | 
| 
       6 
     | 
    
         
            -
             
     | 
| 
      
 6 
     | 
    
         
            +
            data4|  data |data  |d a t a|data|data
         
     | 
| 
         @@ -0,0 +1,17 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # spec for file-sniffing functions
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            require "spec_helper"
         
     | 
| 
      
 4 
     | 
    
         
            +
            require "tempfile"
         
     | 
| 
      
 5 
     | 
    
         
            +
             
     | 
| 
      
 6 
     | 
    
         
            +
            describe "csvification" do
         
     | 
| 
      
 7 
     | 
    
         
            +
              it "should convert pipes to standard commas" do
         
     | 
| 
      
 8 
     | 
    
         
            +
                filename = File.dirname(__FILE__) + "/../data/pipe_data.txt"
         
     | 
| 
      
 9 
     | 
    
         
            +
                tmp = Tempfile.new('csvify')
         
     | 
| 
      
 10 
     | 
    
         
            +
                results = Masticate.csvify(filename, :output => tmp, :col_sep => '|')
         
     | 
| 
      
 11 
     | 
    
         
            +
                output = File.read(tmp)
         
     | 
| 
      
 12 
     | 
    
         
            +
                tmp.unlink
         
     | 
| 
      
 13 
     | 
    
         
            +
                output.lines.count.should == 5
         
     | 
| 
      
 14 
     | 
    
         
            +
                results[:input_count].should == 5
         
     | 
| 
      
 15 
     | 
    
         
            +
                results[:output_count].should == 5
         
     | 
| 
      
 16 
     | 
    
         
            +
              end
         
     | 
| 
      
 17 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,20 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # spec for file-sniffing functions
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            require "spec_helper"
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
            describe "mending" do
         
     | 
| 
      
 6 
     | 
    
         
            +
              it "should merge lines when delimiter counts don't match'" do
         
     | 
| 
      
 7 
     | 
    
         
            +
                filename = File.dirname(__FILE__) + "/../data/broken_psv.txt"
         
     | 
| 
      
 8 
     | 
    
         
            +
                results = Masticate.mend(filename, :col_sep => '|', :output => "/dev/null")
         
     | 
| 
      
 9 
     | 
    
         
            +
                results[:input_records].should == 7
         
     | 
| 
      
 10 
     | 
    
         
            +
                results[:output_records].should == 5
         
     | 
| 
      
 11 
     | 
    
         
            +
              end
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
      
 13 
     | 
    
         
            +
              it "should strip trailer records" do
         
     | 
| 
      
 14 
     | 
    
         
            +
                filename = File.dirname(__FILE__) + "/../data/junk_trailer.txt"
         
     | 
| 
      
 15 
     | 
    
         
            +
                metadata = Masticate.sniff(filename)
         
     | 
| 
      
 16 
     | 
    
         
            +
                results = Masticate.mend(filename, metadata.merge(:output => "/dev/null"))
         
     | 
| 
      
 17 
     | 
    
         
            +
                results[:input_records].should == 9
         
     | 
| 
      
 18 
     | 
    
         
            +
                results[:output_records].should == 5
         
     | 
| 
      
 19 
     | 
    
         
            +
              end
         
     | 
| 
      
 20 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -5,17 +5,15 @@ require "spec_helper" 
     | 
|
| 
       5 
5 
     | 
    
         
             
            describe "delimiter sniffing" do
         
     | 
| 
       6 
6 
     | 
    
         
             
              it "should find tab delimiter" do
         
     | 
| 
       7 
7 
     | 
    
         
             
                filename = File.dirname(__FILE__) + "/../data/tabbed_data.txt"
         
     | 
| 
       8 
     | 
    
         
            -
                 
     | 
| 
       9 
     | 
    
         
            -
                results = Masticate.sniff(file)
         
     | 
| 
      
 8 
     | 
    
         
            +
                results = Masticate.sniff(filename)
         
     | 
| 
       10 
9 
     | 
    
         
             
                results[:col_sep].should == "\t"
         
     | 
| 
       11 
     | 
    
         
            -
                results[:field_counts].should ==  
     | 
| 
      
 10 
     | 
    
         
            +
                results[:field_counts].should == {6 => 5}
         
     | 
| 
       12 
11 
     | 
    
         
             
              end
         
     | 
| 
       13 
12 
     | 
    
         | 
| 
       14 
13 
     | 
    
         
             
              it "should find pipe delimiter" do
         
     | 
| 
       15 
14 
     | 
    
         
             
                filename = File.dirname(__FILE__) + "/../data/pipe_data.txt"
         
     | 
| 
       16 
     | 
    
         
            -
                 
     | 
| 
       17 
     | 
    
         
            -
                results = Masticate.sniff(file)
         
     | 
| 
      
 15 
     | 
    
         
            +
                results = Masticate.sniff(filename)
         
     | 
| 
       18 
16 
     | 
    
         
             
                results[:col_sep].should == '|'
         
     | 
| 
       19 
     | 
    
         
            -
                results[:field_counts].should ==  
     | 
| 
      
 17 
     | 
    
         
            +
                results[:field_counts].should == {6 => 5}
         
     | 
| 
       20 
18 
     | 
    
         
             
              end
         
     | 
| 
       21 
19 
     | 
    
         
             
            end
         
     | 
    
        metadata
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            --- !ruby/object:Gem::Specification
         
     | 
| 
       2 
2 
     | 
    
         
             
            name: masticate
         
     | 
| 
       3 
3 
     | 
    
         
             
            version: !ruby/object:Gem::Version
         
     | 
| 
       4 
     | 
    
         
            -
              version: 0.0. 
     | 
| 
      
 4 
     | 
    
         
            +
              version: 0.0.4
         
     | 
| 
       5 
5 
     | 
    
         
             
              prerelease: 
         
     | 
| 
       6 
6 
     | 
    
         
             
            platform: ruby
         
     | 
| 
       7 
7 
     | 
    
         
             
            authors:
         
     | 
| 
         @@ -9,19 +9,41 @@ authors: 
     | 
|
| 
       9 
9 
     | 
    
         
             
            autorequire: 
         
     | 
| 
       10 
10 
     | 
    
         
             
            bindir: bin
         
     | 
| 
       11 
11 
     | 
    
         
             
            cert_chain: []
         
     | 
| 
       12 
     | 
    
         
            -
            date: 2012-04- 
     | 
| 
      
 12 
     | 
    
         
            +
            date: 2012-04-04 00:00:00.000000000 Z
         
     | 
| 
       13 
13 
     | 
    
         
             
            dependencies:
         
     | 
| 
       14 
14 
     | 
    
         
             
            - !ruby/object:Gem::Dependency
         
     | 
| 
       15 
15 
     | 
    
         
             
              name: rspec
         
     | 
| 
       16 
     | 
    
         
            -
              requirement: & 
     | 
| 
      
 16 
     | 
    
         
            +
              requirement: &2153254280 !ruby/object:Gem::Requirement
         
     | 
| 
       17 
17 
     | 
    
         
             
                none: false
         
     | 
| 
       18 
18 
     | 
    
         
             
                requirements:
         
     | 
| 
       19 
     | 
    
         
            -
                - -  
     | 
| 
      
 19 
     | 
    
         
            +
                - - ~>
         
     | 
| 
       20 
20 
     | 
    
         
             
                  - !ruby/object:Gem::Version
         
     | 
| 
       21 
     | 
    
         
            -
                    version:  
     | 
| 
      
 21 
     | 
    
         
            +
                    version: 2.9.0
         
     | 
| 
       22 
22 
     | 
    
         
             
              type: :development
         
     | 
| 
       23 
23 
     | 
    
         
             
              prerelease: false
         
     | 
| 
       24 
     | 
    
         
            -
              version_requirements: * 
     | 
| 
      
 24 
     | 
    
         
            +
              version_requirements: *2153254280
         
     | 
| 
      
 25 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency
         
     | 
| 
      
 26 
     | 
    
         
            +
              name: guard-rspec
         
     | 
| 
      
 27 
     | 
    
         
            +
              requirement: &2153246900 !ruby/object:Gem::Requirement
         
     | 
| 
      
 28 
     | 
    
         
            +
                none: false
         
     | 
| 
      
 29 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 30 
     | 
    
         
            +
                - - ~>
         
     | 
| 
      
 31 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 32 
     | 
    
         
            +
                    version: 0.7.0
         
     | 
| 
      
 33 
     | 
    
         
            +
              type: :development
         
     | 
| 
      
 34 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 35 
     | 
    
         
            +
              version_requirements: *2153246900
         
     | 
| 
      
 36 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency
         
     | 
| 
      
 37 
     | 
    
         
            +
              name: ruby_gntp
         
     | 
| 
      
 38 
     | 
    
         
            +
              requirement: &2153246180 !ruby/object:Gem::Requirement
         
     | 
| 
      
 39 
     | 
    
         
            +
                none: false
         
     | 
| 
      
 40 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 41 
     | 
    
         
            +
                - - ~>
         
     | 
| 
      
 42 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 43 
     | 
    
         
            +
                    version: 0.3.4
         
     | 
| 
      
 44 
     | 
    
         
            +
              type: :development
         
     | 
| 
      
 45 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 46 
     | 
    
         
            +
              version_requirements: *2153246180
         
     | 
| 
       25 
47 
     | 
    
         
             
            description: Data file crunching
         
     | 
| 
       26 
48 
     | 
    
         
             
            email:
         
     | 
| 
       27 
49 
     | 
    
         
             
            - jmay@pobox.com
         
     | 
| 
         @@ -32,6 +54,7 @@ extra_rdoc_files: [] 
     | 
|
| 
       32 
54 
     | 
    
         
             
            files:
         
     | 
| 
       33 
55 
     | 
    
         
             
            - .gitignore
         
     | 
| 
       34 
56 
     | 
    
         
             
            - Gemfile
         
     | 
| 
      
 57 
     | 
    
         
            +
            - Guardfile
         
     | 
| 
       35 
58 
     | 
    
         
             
            - LICENSE
         
     | 
| 
       36 
59 
     | 
    
         
             
            - README.md
         
     | 
| 
       37 
60 
     | 
    
         
             
            - Rakefile
         
     | 
| 
         @@ -43,11 +66,12 @@ files: 
     | 
|
| 
       43 
66 
     | 
    
         
             
            - lib/masticate/version.rb
         
     | 
| 
       44 
67 
     | 
    
         
             
            - masticate.gemspec
         
     | 
| 
       45 
68 
     | 
    
         
             
            - spec/data/broken_psv.txt
         
     | 
| 
      
 69 
     | 
    
         
            +
            - spec/data/junk_trailer.txt
         
     | 
| 
       46 
70 
     | 
    
         
             
            - spec/data/pipe_data.txt
         
     | 
| 
       47 
71 
     | 
    
         
             
            - spec/data/tabbed_data.txt
         
     | 
| 
       48 
     | 
    
         
            -
            - spec/ 
     | 
| 
       49 
     | 
    
         
            -
            - spec/ 
     | 
| 
       50 
     | 
    
         
            -
            - spec/ 
     | 
| 
      
 72 
     | 
    
         
            +
            - spec/lib/csvify_spec.rb
         
     | 
| 
      
 73 
     | 
    
         
            +
            - spec/lib/mend_spec.rb
         
     | 
| 
      
 74 
     | 
    
         
            +
            - spec/lib/sniffer_spec.rb
         
     | 
| 
       51 
75 
     | 
    
         
             
            - spec/spec_helper.rb
         
     | 
| 
       52 
76 
     | 
    
         
             
            homepage: ''
         
     | 
| 
       53 
77 
     | 
    
         
             
            licenses: []
         
     | 
| 
         @@ -75,10 +99,11 @@ specification_version: 3 
     | 
|
| 
       75 
99 
     | 
    
         
             
            summary: Utility functions for parsing incoming text data files.
         
     | 
| 
       76 
100 
     | 
    
         
             
            test_files:
         
     | 
| 
       77 
101 
     | 
    
         
             
            - spec/data/broken_psv.txt
         
     | 
| 
      
 102 
     | 
    
         
            +
            - spec/data/junk_trailer.txt
         
     | 
| 
       78 
103 
     | 
    
         
             
            - spec/data/pipe_data.txt
         
     | 
| 
       79 
104 
     | 
    
         
             
            - spec/data/tabbed_data.txt
         
     | 
| 
       80 
     | 
    
         
            -
            - spec/ 
     | 
| 
       81 
     | 
    
         
            -
            - spec/ 
     | 
| 
       82 
     | 
    
         
            -
            - spec/ 
     | 
| 
      
 105 
     | 
    
         
            +
            - spec/lib/csvify_spec.rb
         
     | 
| 
      
 106 
     | 
    
         
            +
            - spec/lib/mend_spec.rb
         
     | 
| 
      
 107 
     | 
    
         
            +
            - spec/lib/sniffer_spec.rb
         
     | 
| 
       83 
108 
     | 
    
         
             
            - spec/spec_helper.rb
         
     | 
| 
       84 
109 
     | 
    
         
             
            has_rdoc: 
         
     | 
    
        data/spec/spec/csvify_spec.rb
    DELETED
    
    | 
         @@ -1,14 +0,0 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            # spec for file-sniffing functions
         
     | 
| 
       2 
     | 
    
         
            -
             
     | 
| 
       3 
     | 
    
         
            -
            require "spec_helper"
         
     | 
| 
       4 
     | 
    
         
            -
             
     | 
| 
       5 
     | 
    
         
            -
            describe "csvification" do
         
     | 
| 
       6 
     | 
    
         
            -
              it "should convert pipes to standard commas" do
         
     | 
| 
       7 
     | 
    
         
            -
                filename = File.dirname(__FILE__) + "/../data/pipe_data.txt"
         
     | 
| 
       8 
     | 
    
         
            -
                file = File.open(filename)
         
     | 
| 
       9 
     | 
    
         
            -
                strio = StringIO.new
         
     | 
| 
       10 
     | 
    
         
            -
                Masticate.csvify(file, :output => strio, :col_sep => '|')
         
     | 
| 
       11 
     | 
    
         
            -
                strio.close
         
     | 
| 
       12 
     | 
    
         
            -
                strio.string.lines.count.should == 5
         
     | 
| 
       13 
     | 
    
         
            -
              end
         
     | 
| 
       14 
     | 
    
         
            -
            end
         
     | 
    
        data/spec/spec/mend_spec.rb
    DELETED
    
    | 
         @@ -1,14 +0,0 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            # spec for file-sniffing functions
         
     | 
| 
       2 
     | 
    
         
            -
             
     | 
| 
       3 
     | 
    
         
            -
            require "spec_helper"
         
     | 
| 
       4 
     | 
    
         
            -
             
     | 
| 
       5 
     | 
    
         
            -
            describe "mending" do
         
     | 
| 
       6 
     | 
    
         
            -
              it "should merge lines when delimiter counts don't match'" do
         
     | 
| 
       7 
     | 
    
         
            -
                filename = File.dirname(__FILE__) + "/../data/broken_psv.txt"
         
     | 
| 
       8 
     | 
    
         
            -
                file = File.open(filename)
         
     | 
| 
       9 
     | 
    
         
            -
                devnull = File.open('/dev/null', 'w')
         
     | 
| 
       10 
     | 
    
         
            -
                results = Masticate.mend(file, :output => devnull, :col_sep => '|')
         
     | 
| 
       11 
     | 
    
         
            -
                results[:input_records].should == 6
         
     | 
| 
       12 
     | 
    
         
            -
                results[:output_records].should == 5
         
     | 
| 
       13 
     | 
    
         
            -
              end
         
     | 
| 
       14 
     | 
    
         
            -
            end
         
     |