flat_kit 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (68) hide show
  1. checksums.yaml +7 -0
  2. data/CONTRIBUTING.md +46 -0
  3. data/HISTORY.md +5 -0
  4. data/LICENSE.txt +21 -0
  5. data/Manifest.txt +66 -0
  6. data/README.md +80 -0
  7. data/Rakefile +20 -0
  8. data/bin/fk +4 -0
  9. data/lib/flat_kit.rb +23 -0
  10. data/lib/flat_kit/cli.rb +80 -0
  11. data/lib/flat_kit/command.rb +53 -0
  12. data/lib/flat_kit/command/cat.rb +93 -0
  13. data/lib/flat_kit/command/merge.rb +88 -0
  14. data/lib/flat_kit/command/sort.rb +88 -0
  15. data/lib/flat_kit/descendant_tracker.rb +27 -0
  16. data/lib/flat_kit/error.rb +5 -0
  17. data/lib/flat_kit/format.rb +34 -0
  18. data/lib/flat_kit/input.rb +32 -0
  19. data/lib/flat_kit/input/file.rb +53 -0
  20. data/lib/flat_kit/input/io.rb +54 -0
  21. data/lib/flat_kit/internal_node.rb +84 -0
  22. data/lib/flat_kit/jsonl.rb +8 -0
  23. data/lib/flat_kit/jsonl/format.rb +25 -0
  24. data/lib/flat_kit/jsonl/reader.rb +30 -0
  25. data/lib/flat_kit/jsonl/record.rb +84 -0
  26. data/lib/flat_kit/jsonl/writer.rb +45 -0
  27. data/lib/flat_kit/leaf_node.rb +71 -0
  28. data/lib/flat_kit/logger.rb +39 -0
  29. data/lib/flat_kit/merge.rb +35 -0
  30. data/lib/flat_kit/merge_tree.rb +104 -0
  31. data/lib/flat_kit/output.rb +32 -0
  32. data/lib/flat_kit/output/file.rb +55 -0
  33. data/lib/flat_kit/output/io.rb +73 -0
  34. data/lib/flat_kit/reader.rb +61 -0
  35. data/lib/flat_kit/record.rb +83 -0
  36. data/lib/flat_kit/sentinel_internal_node.rb +37 -0
  37. data/lib/flat_kit/sentinel_leaf_node.rb +37 -0
  38. data/lib/flat_kit/sort.rb +35 -0
  39. data/lib/flat_kit/writer.rb +38 -0
  40. data/lib/flat_kit/xsv.rb +8 -0
  41. data/lib/flat_kit/xsv/format.rb +25 -0
  42. data/lib/flat_kit/xsv/reader.rb +45 -0
  43. data/lib/flat_kit/xsv/record.rb +90 -0
  44. data/lib/flat_kit/xsv/writer.rb +70 -0
  45. data/tasks/default.rake +242 -0
  46. data/tasks/extension.rake +38 -0
  47. data/tasks/man.rake +7 -0
  48. data/tasks/this.rb +208 -0
  49. data/test/device_dataset.rb +117 -0
  50. data/test/input/test_file.rb +73 -0
  51. data/test/input/test_io.rb +93 -0
  52. data/test/jsonl/test_format.rb +22 -0
  53. data/test/jsonl/test_reader.rb +49 -0
  54. data/test/jsonl/test_record.rb +61 -0
  55. data/test/jsonl/test_writer.rb +68 -0
  56. data/test/output/test_file.rb +60 -0
  57. data/test/output/test_io.rb +104 -0
  58. data/test/test_conversions.rb +45 -0
  59. data/test/test_format.rb +24 -0
  60. data/test/test_helper.rb +26 -0
  61. data/test/test_merge.rb +40 -0
  62. data/test/test_merge_tree.rb +64 -0
  63. data/test/test_version.rb +11 -0
  64. data/test/xsv/test_format.rb +22 -0
  65. data/test/xsv/test_reader.rb +61 -0
  66. data/test/xsv/test_record.rb +69 -0
  67. data/test/xsv/test_writer.rb +68 -0
  68. metadata +237 -0
@@ -0,0 +1,38 @@
1
+ # To be used if the gem has extensions.
2
+ # If this task set is inclueded then you will need to also have
3
+ #
4
+ # spec.add_development_dependency( 'rake-compiler', '~> 0.8.1' )
5
+ #
6
+ # in your top level rakefile
7
+ begin
8
+ require 'rake/extensiontask'
9
+ require 'rake/javaextensiontask'
10
+
11
+ if RUBY_PLATFORM == "java" then
12
+
13
+ Rake::JavaExtensionTask.new( This.name) do |ext|
14
+ ext.ext_dir = File.join( 'ext', This.name, "java" )
15
+ ext.lib_dir = File.join( 'lib', This.name )
16
+ ext.gem_spec = This.java_gemspec
17
+ end
18
+
19
+ else
20
+
21
+ Rake::ExtensionTask.new( This.name ) do |ext|
22
+ ext.ext_dir = File.join( 'ext', This.name, "c" )
23
+ ext.lib_dir = File.join( 'lib', This.name )
24
+ ext.gem_spec = This.ruby_gemspec
25
+
26
+ ext.cross_compile = true # enable cross compilation (requires cross compile toolchain)
27
+ ext.cross_platform = %w[x86-mingw32 x64-mingw32] # forces the Windows platform instead of the default one
28
+ # configure options only for cross compile
29
+ end
30
+ end
31
+
32
+ task :test_requirements => :compile
33
+ rescue LoadError
34
+ This.task_warning( 'extension' )
35
+ end
36
+
37
+ CLOBBER << FileList["lib/**/*.{jar,so,bundle}"]
38
+ CLOBBER << FileList["lib/#{This.name}/{1,2}.*/"]
@@ -0,0 +1,7 @@
1
+ #------------------------------------------------------------------------------
2
+ # man pages
3
+ #------------------------------------------------------------------------------
4
+ desc "Create the man pages"
5
+ task :man do
6
+ sh "ronn --roff #{FileList["man/*.ronn"]}"
7
+ end
@@ -0,0 +1,208 @@
1
+ require 'pathname'
2
+
3
+ # Public: A Class containing all the metadata and utilities needed to manage a
4
+ # ruby project.
5
+ class ThisProject
6
+ # The name of this project
7
+ attr_accessor :name
8
+
9
+ # The author's name
10
+ attr_accessor :author
11
+
12
+ # The email address of the author(s)
13
+ attr_accessor :email
14
+
15
+ # The homepage of this project
16
+ attr_accessor :homepage
17
+
18
+ # The regex of files to exclude from the manifest
19
+ attr_accessor :exclude_from_manifest
20
+
21
+ # The hash of Gem::Specifications keyed' by platform
22
+ attr_accessor :gemspecs
23
+
24
+ # Public: Initialize ThisProject
25
+ #
26
+ # Yields self
27
+ def initialize(&block)
28
+ @exclude_from_manifest = Regexp.union(/\.(git|DS_Store)/,
29
+ /^(doc|coverage|pkg|tmp|Gemfile(\.lock)?)/,
30
+ /^[^\/]+\.gemspec/,
31
+ /\.(swp|jar|bundle|so|rvmrc|travis.yml|byebug_history|fossa.yml|ruby-version)$/,
32
+ /~$/)
33
+ @gemspecs = Hash.new
34
+ yield self if block_given?
35
+ end
36
+
37
+ # Public: return the version of ThisProject
38
+ #
39
+ # Search the ruby files in the project looking for the one that has the
40
+ # version string in it. This does not eval any code in the project, it parses
41
+ # the source code looking for the string.
42
+ #
43
+ # Returns a String version
44
+ def version
45
+ [ "lib/#{ name }.rb", "lib/#{ name }/version.rb" ].each do |v|
46
+ path = project_path( v )
47
+ line = path.read[/^\s*VERSION\s*=\s*.*/]
48
+ if line then
49
+ return line.match(/.*VERSION\s*=\s*['"](.*)['"]/)[1]
50
+ end
51
+ end
52
+ end
53
+
54
+ # Internal: Return a section of an RDoc file with the given section name
55
+ #
56
+ # path - the relative path in the project of the file to parse
57
+ # section_name - the section out of the file from which to parse data
58
+ #
59
+ # Retuns the text of the section as an array of paragrphs.
60
+ def section_of( file, section_name )
61
+ re = /^[=#]+ (.*)$/
62
+ sectional = project_path( file )
63
+ parts = sectional.read.split( re )[1..-1]
64
+ parts.map! { |p| p.strip }
65
+
66
+ sections = Hash.new
67
+ Hash[*parts].each do |k,v|
68
+ sections[k] = v.split("\n\n")
69
+ end
70
+ return sections[section_name]
71
+ end
72
+
73
+ # Internal: print out a warning about the give task
74
+ def task_warning( task )
75
+ warn "WARNING: '#{task}' tasks are not defined. Please run 'rake develop'"
76
+ end
77
+
78
+ # Internal: Return the full path to the file that is relative to the project
79
+ # root.
80
+ #
81
+ # path - the relative path of the file from the project root
82
+ #
83
+ # Returns the Pathname of the file
84
+ def project_path( *relative_path )
85
+ project_root.join( *relative_path )
86
+ end
87
+
88
+ # Internal: The absolute path of this file
89
+ #
90
+ # Returns the Pathname of this file.
91
+ def this_file_path
92
+ Pathname.new( __FILE__ ).expand_path
93
+ end
94
+
95
+ # Internal: The root directory of this project
96
+ #
97
+ # This is defined as being the directory that is in the path of this project
98
+ # that has the first Rakefile
99
+ #
100
+ # Returns the Pathname of the directory
101
+ def project_root
102
+ this_file_path.ascend do |p|
103
+ rakefile = p.join( 'Rakefile' )
104
+ return p if rakefile.exist?
105
+ end
106
+ end
107
+
108
+ # Internal: Returns the contents of the Manifest.txt file as an array
109
+ #
110
+ # Returns an Array of strings
111
+ def manifest
112
+ manifest_file = project_path( "Manifest.txt" )
113
+ abort "You need a Manifest.txt" unless manifest_file.readable?
114
+ manifest_file.readlines.map { |l| l.strip }
115
+ end
116
+
117
+ # Internal: Return the files that define the extensions
118
+ #
119
+ # Returns an Array
120
+ def extension_conf_files
121
+ manifest.grep( /extconf.rb\Z/ )
122
+ end
123
+
124
+ # Internal: Returns the gemspace associated with the current ruby platform
125
+ def platform_gemspec
126
+ gemspecs.fetch(platform) { This.ruby_gemspec }
127
+ end
128
+
129
+ def core_gemspec
130
+ Gem::Specification.new do |spec|
131
+ spec.name = name
132
+ spec.version = version
133
+ spec.author = author
134
+ spec.email = email
135
+ spec.homepage = homepage
136
+
137
+ spec.summary = summary
138
+ spec.description = description
139
+ spec.license = license
140
+
141
+ spec.files = manifest
142
+ spec.executables = spec.files.grep(/^bin/) { |f| File.basename(f) }
143
+ spec.test_files = spec.files.grep(/^(spec|test)/)
144
+
145
+ spec.extra_rdoc_files += spec.files.grep(/(txt|rdoc|md)$/)
146
+ spec.rdoc_options = [ "--main" , 'README.md',
147
+ "--markup", "tomdoc" ]
148
+
149
+ spec.required_ruby_version = '>= 2.2.2'
150
+ end
151
+ end
152
+
153
+ # Internal: Return the gemspec for the ruby platform
154
+ def ruby_gemspec( core = core_gemspec, &block )
155
+ yielding_gemspec( 'ruby', core, &block )
156
+ end
157
+
158
+ # Internal: Return the gemspec for the jruby platform
159
+ def java_gemspec( core = core_gemspec, &block )
160
+ yielding_gemspec( 'java', core, &block )
161
+ end
162
+
163
+ # Internal: give an initial spec and a key, create a new gemspec based off of
164
+ # it.
165
+ #
166
+ # This will force the new gemspecs 'platform' to be that of the key, since the
167
+ # only reason you would have multiple gemspecs at this point is to deal with
168
+ # different platforms.
169
+ def yielding_gemspec( key, core )
170
+ spec = gemspecs[key] ||= core.dup
171
+ spec.platform = key
172
+ yield spec if block_given?
173
+ return spec
174
+ end
175
+
176
+ # Internal: Return the platform of ThisProject at the current moment in time.
177
+ def platform
178
+ (RUBY_PLATFORM == "java") ? 'java' : Gem::Platform::RUBY
179
+ end
180
+
181
+ # Internal: Return the DESCRIPTION section of the README.rdoc file
182
+ def description_section
183
+ section_of( 'README.md', 'DESCRIPTION')
184
+ end
185
+
186
+ # Internal: Return the summary text from the README
187
+ def summary
188
+ description_section.first
189
+ end
190
+
191
+ # Internal: Return the full description text from the README
192
+ def description
193
+ description_section.join(" ").tr("\n", ' ').gsub(/[{}]/,'').gsub(/\[[^\]]+\]/,'') # strip rdoc
194
+ end
195
+
196
+ def license
197
+ license_file = project_path("LICENSE.txt")
198
+ line = license_file.readlines.first
199
+ line.split(/\s+/).first
200
+ end
201
+
202
+ # Internal: The path to the gemspec file
203
+ def gemspec_file
204
+ project_path( "#{ name }.gemspec" )
205
+ end
206
+ end
207
+
208
+ This = ThisProject.new
@@ -0,0 +1,117 @@
1
+ class DeviceDataset
2
+
3
+ include TestHelper
4
+
5
+ attr_reader :count
6
+ attr_reader :compare_fields
7
+ attr_reader :fields
8
+
9
+ attr_reader :filename_jsonl
10
+ attr_reader :filename_sorted_jsonl
11
+ attr_reader :filename_csv
12
+ attr_reader :filename_sorted_csv
13
+
14
+ def initialize(count:, compare_fields: [ "manufacturer", "model_name", "slug" ])
15
+ @count = count
16
+ @compare_fields = compare_fields
17
+ @fields = %w[
18
+ build_number
19
+ manufacturer
20
+ model_name
21
+ platform
22
+ serial
23
+ slug
24
+ ]
25
+ @filename_sorted_jsonl = nil
26
+ @filename_jsonl = nil
27
+ @filename_sorted_csv = nil
28
+ @filename_csv = nil
29
+ @slug = generate_slug
30
+ end
31
+
32
+ def persist_records_as_jsonl
33
+ @filename_jsonl = scratch_file(prefix: "unsorted_", slug: @slug)
34
+ @filename_jsonl.open("w+") do |f|
35
+ f.write(records_as_jsonl)
36
+ end
37
+ end
38
+
39
+ def persist_sorted_records_as_jsonl
40
+ @filename_sorted_jsonl = scratch_file(prefix: "sorted_", slug: @slug)
41
+ @filename_sorted_jsonl.open("w+") do |f|
42
+ f.write(sorted_records_as_jsonl)
43
+ end
44
+ end
45
+
46
+ def cleanup_files
47
+ [ @filename_sorted_jsonl, @filename_jsonl, @filename_sorted_csv, @filename_csv ].each do |p|
48
+ next if p.nil?
49
+ p.unlink if p.exist?
50
+ end
51
+ end
52
+
53
+ def records
54
+ @records ||= Array.new.tap do |a|
55
+ count.times do
56
+ a << Hash.new.tap do |h|
57
+ fields.each do |f|
58
+ value = (f == 'slug') ? generate_slug : ::Faker::Device.send(f)
59
+ h[f] = value
60
+ end
61
+ end
62
+ end
63
+ end
64
+ end
65
+
66
+ def sorted_records
67
+ @sorted_records ||= records.sort_by do |r|
68
+ compare_fields.map { |field| r[field] }
69
+ end
70
+ end
71
+
72
+ def records_as_jsonl
73
+ @jsonl_records ||= as_jsonl(list: records)
74
+ end
75
+
76
+ def records_as_csv
77
+ @csv_records ||= as_csv(list: records)
78
+ end
79
+
80
+ def records_as_csv_rows
81
+ @csv_rows ||= as_csv_rows(records_as_csv)
82
+ end
83
+
84
+ def sorted_records_as_jsonl
85
+ @jsonl_sorted_records ||= as_jsonl(list: sorted_records)
86
+ end
87
+
88
+ def sorted_records_as_csv
89
+ @csv_sorted_records ||= as_csv(list: sorted_records)
90
+ end
91
+
92
+ def sorted_records_as_csv_rows
93
+ @csv_sorted_rows ||= as_csv_rows(sorted_records_as_csv)
94
+ end
95
+
96
+ private
97
+
98
+ def as_jsonl(list:)
99
+ list.map { |r| Oj.dump(r) }.join("\n") + "\n"
100
+ end
101
+
102
+ def as_csv(list:, headers: fields)
103
+ CSV.generate('', headers: headers , write_headers: true) do |csv|
104
+ list.each do |r|
105
+ csv << fields.map { |f| r[f] }
106
+ end
107
+ end
108
+ end
109
+
110
+ def as_csv_rows(text)
111
+ Array.new.tap do |a|
112
+ CSV.new(text, converters: :numeric, headers: :first_row, return_headers: false).each do |row|
113
+ a << row
114
+ end
115
+ end
116
+ end
117
+ end
@@ -0,0 +1,73 @@
1
+ require_relative '../test_helper'
2
+
3
+ module TestInput
4
+ class TestFile < ::Minitest::Test
5
+ def test_does_not_handle_stdin_text
6
+ ::FlatKit::Input::IO::STDINS.each do |e|
7
+ refute(::FlatKit::Input::File.handles?(e), "#{e} is not stdin text")
8
+ end
9
+ end
10
+
11
+ def test_handles_existing_file
12
+ test_path = "tmp/test_handles_existing_file.txt"
13
+ begin
14
+ IO.write(test_path,"test handles existing file")
15
+ assert(::FlatKit::Input::File.handles?(test_path))
16
+ ensure
17
+ File.unlink(test_path) if File.exist?(test_path)
18
+ end
19
+ end
20
+
21
+ def test_only_handles_string
22
+ refute(::FlatKit::Input::File.handles?(Object.new))
23
+ end
24
+
25
+ def test_raises_error_if_not_readable
26
+ assert_raises(FlatKit::Error) { ::FlatKit::Input::File.new("tmp/does-not-exist") }
27
+ end
28
+
29
+ def test_init_from_path
30
+ test_path = "tmp/test_init_from_path.txt"
31
+ begin
32
+ IO.write(test_path,"nothing to see here")
33
+ io = ::FlatKit::Input::File.new(test_path)
34
+ assert_equal(test_path, io.name)
35
+ assert_instance_of(::File, io.io)
36
+ ensure
37
+ File.unlink(test_path) if File.exist?(test_path)
38
+ end
39
+ end
40
+
41
+ def test_reads_from_file
42
+ test_path = "tmp/test_reads_from_file.txt"
43
+ begin
44
+ text = "test_reads_from_file"
45
+ IO.write(test_path,text)
46
+
47
+ input = ::FlatKit::Input::File.new(test_path)
48
+ content = input.io.read
49
+ assert_equal(text, content)
50
+
51
+ input.close
52
+ ensure
53
+ File.unlink(test_path) if File.exist?(test_path)
54
+ end
55
+ end
56
+
57
+ def test_reads_from_gzfile
58
+ test_path = "tmp/test_reads_from_gzfile.txt.gz"
59
+ begin
60
+ text = "this is something to read"
61
+ system("echo '#{text}' | gzip > #{test_path}")
62
+
63
+ input = ::FlatKit::Input::File.new(test_path)
64
+ content = input.io.read
65
+ assert_equal(text + "\n", content)
66
+
67
+ input.close
68
+ ensure
69
+ File.unlink(test_path) if File.exist?(test_path)
70
+ end
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,93 @@
1
+ require_relative '../test_helper'
2
+
3
+ module TestInput
4
+ class NullIO < ::IO
5
+ def initialize()
6
+ end
7
+ end
8
+
9
+ class TestIO < ::Minitest::Test
10
+
11
+ def test_handles_stdin_text
12
+ ::FlatKit::Input::IO::STDINS.each do |e|
13
+ assert(::FlatKit::Input::IO.handles?(e), "#{e} is not stdin text")
14
+ end
15
+ end
16
+
17
+ def test_handles_stdin_io
18
+ x = $stdin
19
+ assert(::FlatKit::Input::IO.handles?(x), "is not stdin")
20
+ end
21
+
22
+ def test_handles_stringio
23
+ assert(::FlatKit::Input::IO.handles?(StringIO.new))
24
+ end
25
+
26
+ def test_does_not_handle_other
27
+ x = Object.new
28
+ refute(::FlatKit::Input::IO.handles?(x))
29
+ end
30
+
31
+ def test_init_from_dash
32
+ io = ::FlatKit::Input::IO.new("-")
33
+ assert_equal("<STDIN>", io.name)
34
+ assert_equal(::STDIN, io.io)
35
+ end
36
+
37
+ def test_init_from_file_object
38
+ test_path = "tmp/test_init_from_file_object.txt"
39
+ begin
40
+ File.open(test_path, "w+") do |f|
41
+ io = ::FlatKit::Input::IO.new(f)
42
+ assert_equal(test_path, io.name)
43
+ assert_instance_of(::File, io.io)
44
+ end
45
+ ensure
46
+ File.unlink(test_path) if File.exist?(test_path)
47
+ end
48
+ end
49
+
50
+ def test_init_from_string_io_object
51
+ sio = StringIO.new
52
+ io = ::FlatKit::Input::IO.new(sio)
53
+ assert_match(/StringIO/, io.name)
54
+ assert_instance_of(::StringIO, io.io)
55
+ end
56
+
57
+ def test_init_from_io_object
58
+ null_io = NullIO.new
59
+ io = ::FlatKit::Input::IO.new(null_io)
60
+ assert_match(/NullIO/, io.name)
61
+ assert_instance_of(::TestInput::NullIO, io.io)
62
+ end
63
+
64
+ def test_init_from_stdin
65
+ io = ::FlatKit::Input::IO.new($stdin)
66
+ assert_equal("<STDIN>", io.name)
67
+ assert_equal(::STDIN, io.io)
68
+ end
69
+
70
+ def test_init_from_invalid
71
+ assert_raises(::FlatKit::Error) { ::FlatKit::Input::IO.new(Object.new) }
72
+ end
73
+
74
+ def test_reads_from_io
75
+ test_path = "tmp/test_reads_from_io.txt"
76
+ begin
77
+ line = "This is a line to read"
78
+ File.open(test_path, "w+") do |f|
79
+ f.write(line)
80
+ f.rewind
81
+
82
+ io = ::FlatKit::Input::IO.new(f)
83
+ content = io.io.read
84
+ io.close
85
+ assert_equal(content, line)
86
+ end
87
+ ensure
88
+ File.unlink(test_path) if File.exist?(test_path)
89
+ end
90
+ end
91
+
92
+ end
93
+ end