textfile 1.0.0.rc1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ YWRjZWIwY2JmZDg5Zjg4N2RiOGU4OWJmZGNkZDdjNjI4ZDc4NzI2NA==
5
+ data.tar.gz: !binary |-
6
+ ODViYmEzN2Y1Zjg1YjI2NzM0OWU0YTI0NzU2YWU2YTllZWM3YWFmMw==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ OGJlYjNhYTZhNWY5NGE5YzUxYzU3N2Y4YzM1NGRmYzczNDFmNDA0NDNlOWE4
10
+ ZGUxMWZhNDgxYmU0YjMwZjJhMjAxNWI5MmI3ODhlYWVkNWViMzNiNzA4M2Mx
11
+ OTY2ZGJjNzNmYjI2ZjhlYTkyNjk1MmVkNWU5MmRhYzRlYzZjNTQ=
12
+ data.tar.gz: !binary |-
13
+ NWEyMGFjZjA3OWM0YWE3MmMxZDUwMjQ1YjliZGMwYjEyYTFlNzg3NGY3OWU3
14
+ NzNiNTBiNGE5YWE4NTYxYTMxYTdlZDgyNmQ0NTI1ZGJjNjA0ODEzZjkxOTgx
15
+ NGVjZWUzNjVhMjNmMTQ3ZTJkNzg4MmQ0ZGQ1YjBjMjNhN2ZkMmE=
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
data/Gemfile ADDED
@@ -0,0 +1,14 @@
1
+ source "http://rubygems.org"
2
+ # Add dependencies required to use your gem here.
3
+ # Example:
4
+ # gem "activesupport", ">= 2.3.5"
5
+
6
+ # Add dependencies to develop your gem here.
7
+ # Include everything needed to run rake, tests, features, etc.
8
+ group :development do
9
+ gem "shoulda", ">= 0"
10
+ gem "rdoc", "~> 3.12"
11
+ gem "bundler", "~> 1.0"
12
+ gem "jeweler", "~> 2.0.1"
13
+ gem "simplecov", ">= 0"
14
+ end
data/Gemfile.lock ADDED
@@ -0,0 +1,80 @@
1
+ GEM
2
+ remote: http://rubygems.org/
3
+ specs:
4
+ activesupport (4.1.2)
5
+ i18n (~> 0.6, >= 0.6.9)
6
+ json (~> 1.7, >= 1.7.7)
7
+ minitest (~> 5.1)
8
+ thread_safe (~> 0.1)
9
+ tzinfo (~> 1.1)
10
+ addressable (2.3.6)
11
+ builder (3.2.2)
12
+ descendants_tracker (0.0.4)
13
+ thread_safe (~> 0.3, >= 0.3.1)
14
+ docile (1.1.5)
15
+ faraday (0.9.0)
16
+ multipart-post (>= 1.2, < 3)
17
+ git (1.2.7)
18
+ github_api (0.11.3)
19
+ addressable (~> 2.3)
20
+ descendants_tracker (~> 0.0.1)
21
+ faraday (~> 0.8, < 0.10)
22
+ hashie (>= 1.2)
23
+ multi_json (>= 1.7.5, < 2.0)
24
+ nokogiri (~> 1.6.0)
25
+ oauth2
26
+ hashie (3.1.0)
27
+ highline (1.6.21)
28
+ i18n (0.6.9)
29
+ jeweler (2.0.1)
30
+ builder
31
+ bundler (>= 1.0)
32
+ git (>= 1.2.5)
33
+ github_api
34
+ highline (>= 1.6.15)
35
+ nokogiri (>= 1.5.10)
36
+ rake
37
+ rdoc
38
+ json (1.8.1)
39
+ jwt (1.0.0)
40
+ mini_portile (0.6.0)
41
+ minitest (5.3.5)
42
+ multi_json (1.10.1)
43
+ multi_xml (0.5.5)
44
+ multipart-post (2.0.0)
45
+ nokogiri (1.6.2.1)
46
+ mini_portile (= 0.6.0)
47
+ oauth2 (0.9.4)
48
+ faraday (>= 0.8, < 0.10)
49
+ jwt (~> 1.0)
50
+ multi_json (~> 1.3)
51
+ multi_xml (~> 0.5)
52
+ rack (~> 1.2)
53
+ rack (1.5.2)
54
+ rake (10.3.2)
55
+ rdoc (3.12.2)
56
+ json (~> 1.4)
57
+ shoulda (3.5.0)
58
+ shoulda-context (~> 1.0, >= 1.0.1)
59
+ shoulda-matchers (>= 1.4.1, < 3.0)
60
+ shoulda-context (1.2.1)
61
+ shoulda-matchers (2.6.1)
62
+ activesupport (>= 3.0.0)
63
+ simplecov (0.8.2)
64
+ docile (~> 1.1.0)
65
+ multi_json
66
+ simplecov-html (~> 0.8.0)
67
+ simplecov-html (0.8.0)
68
+ thread_safe (0.3.4)
69
+ tzinfo (1.2.1)
70
+ thread_safe (~> 0.1)
71
+
72
+ PLATFORMS
73
+ ruby
74
+
75
+ DEPENDENCIES
76
+ bundler (~> 1.0)
77
+ jeweler (~> 2.0.1)
78
+ rdoc (~> 3.12)
79
+ shoulda
80
+ simplecov
data/LICENSE.txt ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2014 Piers Chambers
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,24 @@
1
+ = textfile
2
+
3
+ Provides a Set-like wrapper around GNU comm and related textfile utilities.
4
+ Method names are inspired by Set, though this is not an Enumerable class.
5
+
6
+ On OS X see http://apple.stackexchange.com/questions/69223/how-to-replace-mac-os-x-utilities-with-gnu-core-utilities
7
+
8
+ A common use case is to identify differences between exported datasets where the datasets may exceed 100K rows and each row may exceed 4K characters.
9
+
10
+ == Contributing to textfile
11
+
12
+ * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet.
13
+ * Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it.
14
+ * Fork the project.
15
+ * Start a feature/bugfix branch.
16
+ * Commit and push until you are happy with your contribution.
17
+ * Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
18
+ * Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
19
+
20
+ == Copyright
21
+
22
+ Copyright (c) 2014 Piers Chambers. See LICENSE.txt for
23
+ further details.
24
+
data/Rakefile ADDED
@@ -0,0 +1,51 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ begin
6
+ Bundler.setup(:default, :development)
7
+ rescue Bundler::BundlerError => e
8
+ $stderr.puts e.message
9
+ $stderr.puts "Run `bundle install` to install missing gems"
10
+ exit e.status_code
11
+ end
12
+ require 'rake'
13
+
14
+ require 'jeweler'
15
+ Jeweler::Tasks.new do |gem|
16
+ # gem is a Gem::Specification... see http://guides.rubygems.org/specification-reference/ for more options
17
+ gem.name = "textfile"
18
+ gem.homepage = "http://github.com/varyonic/textfile"
19
+ gem.license = "MIT"
20
+ gem.summary = %Q{Set-like wrapper around GNU comm and related textfile utilities.}
21
+ gem.description = %Q{A common use case is to identify differences between exported datasets where the datasets may exceed 100K rows and each row may exceed 4K characters.}
22
+ gem.email = "piers@varyonic.com"
23
+ gem.authors = ["Piers Chambers"]
24
+ # dependencies defined in Gemfile
25
+ end
26
+ Jeweler::RubygemsDotOrgTasks.new
27
+
28
+ require 'rake/testtask'
29
+ Rake::TestTask.new(:test) do |test|
30
+ test.libs << 'lib' << 'test'
31
+ test.pattern = 'test/**/test_*.rb'
32
+ test.verbose = true
33
+ end
34
+
35
+ desc "Code coverage detail"
36
+ task :simplecov do
37
+ ENV['COVERAGE'] = "true"
38
+ Rake::Task['test'].execute
39
+ end
40
+
41
+ task :default => :test
42
+
43
+ require 'rdoc/task'
44
+ Rake::RDocTask.new do |rdoc|
45
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
46
+
47
+ rdoc.rdoc_dir = 'rdoc'
48
+ rdoc.title = "textfile #{version}"
49
+ rdoc.rdoc_files.include('README*')
50
+ rdoc.rdoc_files.include('lib/**/*.rb')
51
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 1.0.0.rc1
data/lib/textfile.rb ADDED
@@ -0,0 +1,85 @@
1
+ require 'logger'
2
+ require 'tempfile'
3
+
4
+ class Textfile
5
+
6
+ attr_accessor :path, :logger
7
+
8
+ # options
9
+ # * +:bufsiz+ - Passed to GNU sort to optimize performance.
10
+ # * +:debug+ - Suppress deletion of temp files.
11
+ # * +:lang+ - Collation sequence.
12
+ # * +:logger+ - Logs shell commands and resulting ouput (default: STDOUT).
13
+ def initialize(path, options = {})
14
+ @bufize = options[:bufsiz]
15
+ @debug = options[:debug]
16
+ @lang = options[:lang]
17
+ @logger = options[:logger] || Logger.new(STDOUT)
18
+ @path = path
19
+ end
20
+
21
+ # Removes all records.
22
+ def clear
23
+ sh "cat /dev/null > #{@path}"
24
+ end
25
+
26
+ # Removes records not present in other textfile.
27
+ def intersection(textfile)
28
+ comm(textfile, '-12')
29
+ end
30
+
31
+ # Merges the contents of other textfiles and returns self.
32
+ def merge(*textfiles)
33
+ sh "cat #{textfiles.map(&:path).join(' ')} >> #{@path}"
34
+ self.sort
35
+ end
36
+
37
+ # Remove records present in other textfile.
38
+ def subtract(textfile)
39
+ # --nocheck-order, see https://bugzilla.redhat.com/show_bug.cgi?id=1001775
40
+ comm(textfile, '--nocheck-order -23')
41
+ end
42
+
43
+ protected
44
+ # OS X comm can't handle lines > 2K bytes.
45
+ # See http://apple.stackexchange.com/questions/69223/how-to-replace-mac-os-x-utilities-with-gnu-core-utilities
46
+ def comm_cmd() (RUBY_PLATFORM =~ /darwin/ ? 'gcomm' : 'comm') end
47
+ def sort_cmd() (RUBY_PLATFORM =~ /darwin/ ? 'gsort' : 'sort') end
48
+ def uniq_cmd() (RUBY_PLATFORM =~ /darwin/ ? 'guniq' : 'uniq') end
49
+
50
+ def comm(textfile, options)
51
+ self.sort
52
+ textfile.sort
53
+ with_tempcopy do |tempcopy|
54
+ sh "#{comm_cmd} #{options} #{tempcopy} #{textfile.path} > #{@path}"
55
+ end
56
+ end
57
+
58
+ def sh(cmd)
59
+ cmd = "export LC_COLLATE=#{@lang}; #{cmd}" if @lang
60
+ logger.info cmd;
61
+ logger.info %x[ #{cmd} ] # TODO: capture $?
62
+ self
63
+ end
64
+
65
+ # Sorts file and removes any duplicate records.
66
+ def sort
67
+ return self if sorted
68
+ options = "--buffer-size=#{@bufsiz}" if @bufsiz
69
+ with_tempcopy do |tempcopy|
70
+ sh "#{sort_cmd} #{options} #{tempcopy} | #{uniq_cmd} > #{@path}"
71
+ end
72
+ @sorted = true
73
+ self
74
+ end
75
+ attr_accessor :sorted
76
+
77
+ def with_tempcopy
78
+ tempcopy = Tempfile.new(['temp-','.txt'])
79
+ tempcopy.write(File.read(@path))
80
+ tempcopy.close
81
+ yield tempcopy.path
82
+ tempcopy.unlink unless @debug
83
+ self
84
+ end
85
+ end
data/test/helper.rb ADDED
@@ -0,0 +1,32 @@
1
+ require 'simplecov'
2
+
3
+ module SimpleCov::Configuration
4
+ def clean_filters
5
+ @filters = []
6
+ end
7
+ end
8
+
9
+ SimpleCov.configure do
10
+ clean_filters
11
+ load_profile 'test_frameworks'
12
+ end
13
+
14
+ ENV["COVERAGE"] && SimpleCov.start do
15
+ add_filter "/.rvm/"
16
+ end
17
+ require 'rubygems'
18
+ require 'bundler'
19
+ begin
20
+ Bundler.setup(:default, :development)
21
+ rescue Bundler::BundlerError => e
22
+ $stderr.puts e.message
23
+ $stderr.puts "Run `bundle install` to install missing gems"
24
+ exit e.status_code
25
+ end
26
+ require "minitest/autorun"
27
+ require 'shoulda'
28
+
29
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
30
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
31
+ require 'textfile'
32
+
@@ -0,0 +1,67 @@
1
+ # encoding: utf-8
2
+ require 'helper'
3
+
4
+ class TestTextfile < Minitest::Test
5
+ def infile(*records)
6
+ file = Tempfile.new(self.class.name)
7
+ file.puts(records)
8
+ file.close
9
+ Textfile.new(file.path)
10
+ end
11
+
12
+ should "clear a file" do
13
+ textfile = infile('3','2','1')
14
+
15
+ textfile.clear
16
+
17
+ file = File.open(textfile.path)
18
+ assert_equal([], file.read.split)
19
+ end
20
+
21
+ should "find the intersection of two datasets" do
22
+ tf1 = infile('3','2','1','b')
23
+ tf2 = infile('c','b','a','2')
24
+
25
+ tf1.intersection(tf2)
26
+
27
+ file = File.open(tf1.path)
28
+ assert_equal(['2','b'], file.read.split)
29
+ end
30
+
31
+ should "merge two datasets" do
32
+ tf1 = infile('3','2','1','b')
33
+ tf2 = infile('c','b','a','2')
34
+
35
+ tf1.merge(tf2)
36
+
37
+ file = File.open(tf1.path)
38
+ assert_equal(['1','2','3','a','b','c'], file.read.split)
39
+ end
40
+
41
+ should "sort a simple file" do
42
+ textfile = infile(['3','2','1'])
43
+
44
+ textfile.send :sort
45
+
46
+ file = File.open(textfile.path)
47
+ assert_equal(file.read.split, ['1', '2', '3'])
48
+ end
49
+
50
+ should "sort a file with very long records" do
51
+ textfile = infile('3'*9999,'2','1')
52
+
53
+ textfile.send :sort
54
+
55
+ file = File.open(textfile.path)
56
+ assert_equal(file.read.split, ['1', '2', '3'*9999])
57
+ end
58
+
59
+ should "sort non-ASCII characters" do
60
+ textfile = infile('Muffler','MX Systems','Müller','MySQL')
61
+
62
+ textfile.send :sort
63
+
64
+ file = File.open(textfile.path, external_encoding: 'UTF-8')
65
+ assert_equal(file.read.split("\n"), ["MX Systems", "Muffler", "MySQL", "Müller"])
66
+ end
67
+ end
metadata ADDED
@@ -0,0 +1,126 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: textfile
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0.rc1
5
+ platform: ruby
6
+ authors:
7
+ - Piers Chambers
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-11-08 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: shoulda
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ! '>='
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ! '>='
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rdoc
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: '3.12'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ version: '3.12'
41
+ - !ruby/object:Gem::Dependency
42
+ name: bundler
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ~>
46
+ - !ruby/object:Gem::Version
47
+ version: '1.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: '1.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: jeweler
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: 2.0.1
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ~>
67
+ - !ruby/object:Gem::Version
68
+ version: 2.0.1
69
+ - !ruby/object:Gem::Dependency
70
+ name: simplecov
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ! '>='
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ! '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ description: A common use case is to identify differences between exported datasets
84
+ where the datasets may exceed 100K rows and each row may exceed 4K characters.
85
+ email: piers@varyonic.com
86
+ executables: []
87
+ extensions: []
88
+ extra_rdoc_files:
89
+ - LICENSE.txt
90
+ - README.rdoc
91
+ files:
92
+ - .document
93
+ - Gemfile
94
+ - Gemfile.lock
95
+ - LICENSE.txt
96
+ - README.rdoc
97
+ - Rakefile
98
+ - VERSION
99
+ - lib/textfile.rb
100
+ - test/helper.rb
101
+ - test/test_textfile.rb
102
+ homepage: http://github.com/varyonic/textfile
103
+ licenses:
104
+ - MIT
105
+ metadata: {}
106
+ post_install_message:
107
+ rdoc_options: []
108
+ require_paths:
109
+ - lib
110
+ required_ruby_version: !ruby/object:Gem::Requirement
111
+ requirements:
112
+ - - ! '>='
113
+ - !ruby/object:Gem::Version
114
+ version: '0'
115
+ required_rubygems_version: !ruby/object:Gem::Requirement
116
+ requirements:
117
+ - - ! '>'
118
+ - !ruby/object:Gem::Version
119
+ version: 1.3.1
120
+ requirements: []
121
+ rubyforge_project:
122
+ rubygems_version: 2.1.11
123
+ signing_key:
124
+ specification_version: 4
125
+ summary: Set-like wrapper around GNU comm and related textfile utilities.
126
+ test_files: []