masticate 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ .rspec
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in masticate.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Jason May
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,29 @@
1
+ # Masticate
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'masticate'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install masticate
18
+
19
+ ## Usage
20
+
21
+ TODO: Write usage instructions here
22
+
23
+ ## Contributing
24
+
25
+ 1. Fork it
26
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
27
+ 3. Commit your changes (`git commit -am 'Added some feature'`)
28
+ 4. Push to the branch (`git push origin my-new-feature`)
29
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
3
+
4
+ require 'rspec/core/rake_task'
5
+
6
+ RSpec::Core::RakeTask.new(:spec)
7
+
8
+ # task :default => :spec
@@ -0,0 +1,40 @@
1
+ class Masticate::Sniffer
2
+ attr_reader :file
3
+ attr_reader :col_sep
4
+
5
+ CandidateDelimiters = [',', '|', "\t"]
6
+
7
+ def initialize(file)
8
+ @file = file
9
+ end
10
+
11
+ def self.sniff(file)
12
+ sniffer = new(file)
13
+ sniffer.sniff
14
+ end
15
+
16
+ def sniff
17
+ @col_sep = find_col_sep
18
+ {
19
+ :col_sep => col_sep,
20
+ :field_counts => stats
21
+ }
22
+ end
23
+
24
+ def find_col_sep
25
+ line1 = file.lines.first
26
+ delimcounts = CandidateDelimiters.each_with_object({}) do |delim,h|
27
+ h[delim] = consider_delim(line1, delim)
28
+ end
29
+ file.seek(0) # reset file pointer
30
+ delimcounts.sort_by{|h,v| -v}.first.first
31
+ end
32
+
33
+ def consider_delim(line, delim)
34
+ line.count(delim)
35
+ end
36
+
37
+ def stats
38
+ file.lines.map {|line| line.split(col_sep).count}.uniq
39
+ end
40
+ end
@@ -0,0 +1,3 @@
1
+ module Masticate
2
+ VERSION = "0.0.1"
3
+ end
data/lib/masticate.rb ADDED
@@ -0,0 +1,8 @@
1
+ require "masticate/version"
2
+ require "masticate/sniffer"
3
+
4
+ module Masticate
5
+ def self.sniff(file)
6
+ Sniffer.new(file).sniff
7
+ end
8
+ end
data/masticate.gemspec ADDED
@@ -0,0 +1,20 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/masticate/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.authors = ["Jason May"]
6
+ gem.email = ["jmay@pobox.com"]
7
+ gem.description = %q{Data file crunching}
8
+ gem.summary = %q{Utility functions for parsing incoming text data files.}
9
+ gem.homepage = ""
10
+ gem.rubyforge_project = "masticate"
11
+
12
+ gem.files = `git ls-files`.split($\)
13
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
14
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
15
+ gem.name = "masticate"
16
+ gem.require_paths = ["lib"]
17
+ gem.version = Masticate::VERSION
18
+
19
+ gem.add_development_dependency "rspec"
20
+ end
@@ -0,0 +1,5 @@
1
+ COL1|COL 2|Col 3 |col-4| col5 |col6
2
+ data| data |data |d a t a|data|data
3
+ data| data |data |d a t a|data|data
4
+ data| data |data |d a t a|data|data
5
+ data| data |data |d a t a|data|data
@@ -0,0 +1,5 @@
1
+ COL1 COL 2 Col 3 col-4 col5 col6
2
+ data data data d a t a data data
3
+ data data data d a t a data data
4
+ data data data d a t a data data
5
+ data data data d a t a data data
@@ -0,0 +1,21 @@
1
+ # spec for file-sniffing functions
2
+
3
+ require "spec_helper"
4
+
5
+ describe "delimiter sniffing" do
6
+ it "should find tab delimiter" do
7
+ filename = File.dirname(__FILE__) + "/../data/tabbed_data.txt"
8
+ file = File.open(filename)
9
+ results = Masticate.sniff(file)
10
+ results[:col_sep].should == "\t"
11
+ results[:field_counts].should == [6]
12
+ end
13
+
14
+ it "should find pipe delimiter" do
15
+ filename = File.dirname(__FILE__) + "/../data/pipe_data.txt"
16
+ file = File.open(filename)
17
+ results = Masticate.sniff(file)
18
+ results[:col_sep].should == '|'
19
+ results[:field_counts].should == [6]
20
+ end
21
+ end
@@ -0,0 +1,34 @@
1
+ # require File.expand_path("../../config/environment", __FILE__)
2
+ # require 'rspec/rails'
3
+ require 'rspec/autorun'
4
+ # require 'capybara/rspec'
5
+
6
+ # Requires supporting ruby files with custom matchers and macros, etc,
7
+ # in spec/support/ and its subdirectories.
8
+ # Dir[Rails.root.join("spec/support/**/*.rb")].each {|f| require f}
9
+
10
+ require File.expand_path('../../lib/masticate', __FILE__)
11
+
12
+ RSpec.configure do |config|
13
+ # ## Mock Framework
14
+ #
15
+ # If you prefer to use mocha, flexmock or RR, uncomment the appropriate line:
16
+ #
17
+ # config.mock_with :mocha
18
+ # config.mock_with :flexmock
19
+ # config.mock_with :rr
20
+
21
+ # Remove this line if you're not using ActiveRecord or ActiveRecord fixtures
22
+ # config.fixture_path = "#{::Rails.root}/spec/fixtures"
23
+
24
+ # If you're not using ActiveRecord, or you'd prefer not to run each of your
25
+ # examples within a transaction, remove the following line or assign false
26
+ # instead of true.
27
+ # config.use_transactional_fixtures = true
28
+ # config.use_instantiated_fixtures = false
29
+
30
+ # If true, the base class of anonymous controllers will be inferred
31
+ # automatically. This will be the default behavior in future versions of
32
+ # rspec-rails.
33
+ # config.infer_base_class_for_anonymous_controllers = false
34
+ end
metadata ADDED
@@ -0,0 +1,74 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: masticate
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Jason May
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-04-03 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rspec
16
+ requirement: &2153533260 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: *2153533260
25
+ description: Data file crunching
26
+ email:
27
+ - jmay@pobox.com
28
+ executables: []
29
+ extensions: []
30
+ extra_rdoc_files: []
31
+ files:
32
+ - .gitignore
33
+ - Gemfile
34
+ - LICENSE
35
+ - README.md
36
+ - Rakefile
37
+ - lib/masticate.rb
38
+ - lib/masticate/sniffer.rb
39
+ - lib/masticate/version.rb
40
+ - masticate.gemspec
41
+ - spec/data/pipe_data.txt
42
+ - spec/data/tabbed_data.txt
43
+ - spec/spec/sniffer_spec.rb
44
+ - spec/spec_helper.rb
45
+ homepage: ''
46
+ licenses: []
47
+ post_install_message:
48
+ rdoc_options: []
49
+ require_paths:
50
+ - lib
51
+ required_ruby_version: !ruby/object:Gem::Requirement
52
+ none: false
53
+ requirements:
54
+ - - ! '>='
55
+ - !ruby/object:Gem::Version
56
+ version: '0'
57
+ required_rubygems_version: !ruby/object:Gem::Requirement
58
+ none: false
59
+ requirements:
60
+ - - ! '>='
61
+ - !ruby/object:Gem::Version
62
+ version: '0'
63
+ requirements: []
64
+ rubyforge_project: masticate
65
+ rubygems_version: 1.8.10
66
+ signing_key:
67
+ specification_version: 3
68
+ summary: Utility functions for parsing incoming text data files.
69
+ test_files:
70
+ - spec/data/pipe_data.txt
71
+ - spec/data/tabbed_data.txt
72
+ - spec/spec/sniffer_spec.rb
73
+ - spec/spec_helper.rb
74
+ has_rdoc: