flat_kit 0.2.0 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CONTRIBUTING.md +1 -2
- data/HISTORY.md +15 -0
- data/Manifest.txt +21 -26
- data/{bin → exe}/fk +2 -1
- data/flat_kit.gemspec +33 -0
- data/lib/flat_kit/cli.rb +48 -23
- data/lib/flat_kit/command/cat.rb +34 -32
- data/lib/flat_kit/command/merge.rb +37 -36
- data/lib/flat_kit/command/sort.rb +37 -37
- data/lib/flat_kit/command/stats.rb +96 -0
- data/lib/flat_kit/command.rb +10 -10
- data/lib/flat_kit/descendant_tracker.rb +17 -5
- data/lib/flat_kit/error.rb +4 -0
- data/lib/flat_kit/event_emitter.rb +7 -4
- data/lib/flat_kit/field_stats.rb +246 -0
- data/lib/flat_kit/field_type/boolean_type.rb +52 -0
- data/lib/flat_kit/field_type/date_type.rb +181 -0
- data/lib/flat_kit/field_type/float_type.rb +43 -0
- data/lib/flat_kit/field_type/guess_type.rb +23 -0
- data/lib/flat_kit/field_type/integer_type.rb +36 -0
- data/lib/flat_kit/field_type/null_type.rb +39 -0
- data/lib/flat_kit/field_type/string_type.rb +24 -0
- data/lib/flat_kit/field_type/timestamp_type.rb +48 -0
- data/lib/flat_kit/field_type/unknown_type.rb +30 -0
- data/lib/flat_kit/field_type.rb +83 -0
- data/lib/flat_kit/format.rb +11 -5
- data/lib/flat_kit/input/file.rb +11 -9
- data/lib/flat_kit/input/io.rb +18 -21
- data/lib/flat_kit/input.rb +8 -7
- data/lib/flat_kit/internal_node.rb +22 -19
- data/lib/flat_kit/jsonl/format.rb +6 -2
- data/lib/flat_kit/jsonl/reader.rb +7 -4
- data/lib/flat_kit/jsonl/record.rb +16 -19
- data/lib/flat_kit/jsonl/writer.rb +25 -18
- data/lib/flat_kit/jsonl.rb +8 -4
- data/lib/flat_kit/leaf_node.rb +6 -5
- data/lib/flat_kit/log_formatter.rb +20 -0
- data/lib/flat_kit/logger.rb +12 -19
- data/lib/flat_kit/merge.rb +21 -16
- data/lib/flat_kit/merge_tree.rb +5 -6
- data/lib/flat_kit/output/file.rb +13 -9
- data/lib/flat_kit/output/io.rb +40 -35
- data/lib/flat_kit/output.rb +12 -7
- data/lib/flat_kit/position.rb +18 -0
- data/lib/flat_kit/reader.rb +8 -8
- data/lib/flat_kit/record.rb +12 -12
- data/lib/flat_kit/sentinel_internal_node.rb +6 -5
- data/lib/flat_kit/sentinel_leaf_node.rb +4 -1
- data/lib/flat_kit/sort.rb +8 -9
- data/lib/flat_kit/stat_type/nominal_stats.rb +64 -0
- data/lib/flat_kit/stat_type/numerical_stats.rb +120 -0
- data/lib/flat_kit/stat_type/ordinal_stats.rb +37 -0
- data/lib/flat_kit/stat_type.rb +70 -0
- data/lib/flat_kit/stats.rb +64 -0
- data/lib/flat_kit/writer.rb +17 -3
- data/lib/flat_kit/xsv/format.rb +6 -2
- data/lib/flat_kit/xsv/reader.rb +8 -6
- data/lib/flat_kit/xsv/record.rb +21 -15
- data/lib/flat_kit/xsv/writer.rb +36 -18
- data/lib/flat_kit/xsv.rb +7 -4
- data/lib/flat_kit.rb +33 -21
- metadata +38 -113
- data/Rakefile +0 -20
- data/tasks/default.rake +0 -242
- data/tasks/extension.rake +0 -38
- data/tasks/man.rake +0 -7
- data/tasks/this.rb +0 -208
- data/test/device_dataset.rb +0 -117
- data/test/input/test_file.rb +0 -73
- data/test/input/test_io.rb +0 -93
- data/test/jsonl/test_format.rb +0 -22
- data/test/jsonl/test_reader.rb +0 -49
- data/test/jsonl/test_record.rb +0 -61
- data/test/jsonl/test_writer.rb +0 -68
- data/test/output/test_file.rb +0 -60
- data/test/output/test_io.rb +0 -104
- data/test/test_conversions.rb +0 -45
- data/test/test_event_emitter.rb +0 -72
- data/test/test_format.rb +0 -24
- data/test/test_helper.rb +0 -26
- data/test/test_merge.rb +0 -40
- data/test/test_merge_tree.rb +0 -64
- data/test/test_version.rb +0 -11
- data/test/xsv/test_format.rb +0 -22
- data/test/xsv/test_reader.rb +0 -61
- data/test/xsv/test_record.rb +0 -69
- data/test/xsv/test_writer.rb +0 -68
data/tasks/extension.rake
DELETED
@@ -1,38 +0,0 @@
|
|
1
|
-
# To be used if the gem has extensions.
|
2
|
-
# If this task set is inclueded then you will need to also have
|
3
|
-
#
|
4
|
-
# spec.add_development_dependency( 'rake-compiler', '~> 0.8.1' )
|
5
|
-
#
|
6
|
-
# in your top level rakefile
|
7
|
-
begin
|
8
|
-
require 'rake/extensiontask'
|
9
|
-
require 'rake/javaextensiontask'
|
10
|
-
|
11
|
-
if RUBY_PLATFORM == "java" then
|
12
|
-
|
13
|
-
Rake::JavaExtensionTask.new( This.name) do |ext|
|
14
|
-
ext.ext_dir = File.join( 'ext', This.name, "java" )
|
15
|
-
ext.lib_dir = File.join( 'lib', This.name )
|
16
|
-
ext.gem_spec = This.java_gemspec
|
17
|
-
end
|
18
|
-
|
19
|
-
else
|
20
|
-
|
21
|
-
Rake::ExtensionTask.new( This.name ) do |ext|
|
22
|
-
ext.ext_dir = File.join( 'ext', This.name, "c" )
|
23
|
-
ext.lib_dir = File.join( 'lib', This.name )
|
24
|
-
ext.gem_spec = This.ruby_gemspec
|
25
|
-
|
26
|
-
ext.cross_compile = true # enable cross compilation (requires cross compile toolchain)
|
27
|
-
ext.cross_platform = %w[x86-mingw32 x64-mingw32] # forces the Windows platform instead of the default one
|
28
|
-
# configure options only for cross compile
|
29
|
-
end
|
30
|
-
end
|
31
|
-
|
32
|
-
task :test_requirements => :compile
|
33
|
-
rescue LoadError
|
34
|
-
This.task_warning( 'extension' )
|
35
|
-
end
|
36
|
-
|
37
|
-
CLOBBER << FileList["lib/**/*.{jar,so,bundle}"]
|
38
|
-
CLOBBER << FileList["lib/#{This.name}/{1,2}.*/"]
|
data/tasks/man.rake
DELETED
@@ -1,7 +0,0 @@
|
|
1
|
-
#------------------------------------------------------------------------------
|
2
|
-
# man pages
|
3
|
-
#------------------------------------------------------------------------------
|
4
|
-
desc "Create the man pages"
|
5
|
-
task :man do
|
6
|
-
sh "ronn --roff #{FileList["man/*.ronn"]}"
|
7
|
-
end
|
data/tasks/this.rb
DELETED
@@ -1,208 +0,0 @@
|
|
1
|
-
require 'pathname'
|
2
|
-
|
3
|
-
# Public: A Class containing all the metadata and utilities needed to manage a
|
4
|
-
# ruby project.
|
5
|
-
class ThisProject
|
6
|
-
# The name of this project
|
7
|
-
attr_accessor :name
|
8
|
-
|
9
|
-
# The author's name
|
10
|
-
attr_accessor :author
|
11
|
-
|
12
|
-
# The email address of the author(s)
|
13
|
-
attr_accessor :email
|
14
|
-
|
15
|
-
# The homepage of this project
|
16
|
-
attr_accessor :homepage
|
17
|
-
|
18
|
-
# The regex of files to exclude from the manifest
|
19
|
-
attr_accessor :exclude_from_manifest
|
20
|
-
|
21
|
-
# The hash of Gem::Specifications keyed' by platform
|
22
|
-
attr_accessor :gemspecs
|
23
|
-
|
24
|
-
# Public: Initialize ThisProject
|
25
|
-
#
|
26
|
-
# Yields self
|
27
|
-
def initialize(&block)
|
28
|
-
@exclude_from_manifest = Regexp.union(/\.(git|DS_Store)/,
|
29
|
-
/^(doc|coverage|pkg|tmp|Gemfile(\.lock)?)/,
|
30
|
-
/^[^\/]+\.gemspec/,
|
31
|
-
/\.(swp|jar|bundle|so|rvmrc|travis.yml|byebug_history|fossa.yml|ruby-version)$/,
|
32
|
-
/~$/)
|
33
|
-
@gemspecs = Hash.new
|
34
|
-
yield self if block_given?
|
35
|
-
end
|
36
|
-
|
37
|
-
# Public: return the version of ThisProject
|
38
|
-
#
|
39
|
-
# Search the ruby files in the project looking for the one that has the
|
40
|
-
# version string in it. This does not eval any code in the project, it parses
|
41
|
-
# the source code looking for the string.
|
42
|
-
#
|
43
|
-
# Returns a String version
|
44
|
-
def version
|
45
|
-
[ "lib/#{ name }.rb", "lib/#{ name }/version.rb" ].each do |v|
|
46
|
-
path = project_path( v )
|
47
|
-
line = path.read[/^\s*VERSION\s*=\s*.*/]
|
48
|
-
if line then
|
49
|
-
return line.match(/.*VERSION\s*=\s*['"](.*)['"]/)[1]
|
50
|
-
end
|
51
|
-
end
|
52
|
-
end
|
53
|
-
|
54
|
-
# Internal: Return a section of an RDoc file with the given section name
|
55
|
-
#
|
56
|
-
# path - the relative path in the project of the file to parse
|
57
|
-
# section_name - the section out of the file from which to parse data
|
58
|
-
#
|
59
|
-
# Retuns the text of the section as an array of paragrphs.
|
60
|
-
def section_of( file, section_name )
|
61
|
-
re = /^[=#]+ (.*)$/
|
62
|
-
sectional = project_path( file )
|
63
|
-
parts = sectional.read.split( re )[1..-1]
|
64
|
-
parts.map! { |p| p.strip }
|
65
|
-
|
66
|
-
sections = Hash.new
|
67
|
-
Hash[*parts].each do |k,v|
|
68
|
-
sections[k] = v.split("\n\n")
|
69
|
-
end
|
70
|
-
return sections[section_name]
|
71
|
-
end
|
72
|
-
|
73
|
-
# Internal: print out a warning about the give task
|
74
|
-
def task_warning( task )
|
75
|
-
warn "WARNING: '#{task}' tasks are not defined. Please run 'rake develop'"
|
76
|
-
end
|
77
|
-
|
78
|
-
# Internal: Return the full path to the file that is relative to the project
|
79
|
-
# root.
|
80
|
-
#
|
81
|
-
# path - the relative path of the file from the project root
|
82
|
-
#
|
83
|
-
# Returns the Pathname of the file
|
84
|
-
def project_path( *relative_path )
|
85
|
-
project_root.join( *relative_path )
|
86
|
-
end
|
87
|
-
|
88
|
-
# Internal: The absolute path of this file
|
89
|
-
#
|
90
|
-
# Returns the Pathname of this file.
|
91
|
-
def this_file_path
|
92
|
-
Pathname.new( __FILE__ ).expand_path
|
93
|
-
end
|
94
|
-
|
95
|
-
# Internal: The root directory of this project
|
96
|
-
#
|
97
|
-
# This is defined as being the directory that is in the path of this project
|
98
|
-
# that has the first Rakefile
|
99
|
-
#
|
100
|
-
# Returns the Pathname of the directory
|
101
|
-
def project_root
|
102
|
-
this_file_path.ascend do |p|
|
103
|
-
rakefile = p.join( 'Rakefile' )
|
104
|
-
return p if rakefile.exist?
|
105
|
-
end
|
106
|
-
end
|
107
|
-
|
108
|
-
# Internal: Returns the contents of the Manifest.txt file as an array
|
109
|
-
#
|
110
|
-
# Returns an Array of strings
|
111
|
-
def manifest
|
112
|
-
manifest_file = project_path( "Manifest.txt" )
|
113
|
-
abort "You need a Manifest.txt" unless manifest_file.readable?
|
114
|
-
manifest_file.readlines.map { |l| l.strip }
|
115
|
-
end
|
116
|
-
|
117
|
-
# Internal: Return the files that define the extensions
|
118
|
-
#
|
119
|
-
# Returns an Array
|
120
|
-
def extension_conf_files
|
121
|
-
manifest.grep( /extconf.rb\Z/ )
|
122
|
-
end
|
123
|
-
|
124
|
-
# Internal: Returns the gemspace associated with the current ruby platform
|
125
|
-
def platform_gemspec
|
126
|
-
gemspecs.fetch(platform) { This.ruby_gemspec }
|
127
|
-
end
|
128
|
-
|
129
|
-
def core_gemspec
|
130
|
-
Gem::Specification.new do |spec|
|
131
|
-
spec.name = name
|
132
|
-
spec.version = version
|
133
|
-
spec.author = author
|
134
|
-
spec.email = email
|
135
|
-
spec.homepage = homepage
|
136
|
-
|
137
|
-
spec.summary = summary
|
138
|
-
spec.description = description
|
139
|
-
spec.license = license
|
140
|
-
|
141
|
-
spec.files = manifest
|
142
|
-
spec.executables = spec.files.grep(/^bin/) { |f| File.basename(f) }
|
143
|
-
spec.test_files = spec.files.grep(/^(spec|test)/)
|
144
|
-
|
145
|
-
spec.extra_rdoc_files += spec.files.grep(/(txt|rdoc|md)$/)
|
146
|
-
spec.rdoc_options = [ "--main" , 'README.md',
|
147
|
-
"--markup", "tomdoc" ]
|
148
|
-
|
149
|
-
spec.required_ruby_version = '>= 2.2.2'
|
150
|
-
end
|
151
|
-
end
|
152
|
-
|
153
|
-
# Internal: Return the gemspec for the ruby platform
|
154
|
-
def ruby_gemspec( core = core_gemspec, &block )
|
155
|
-
yielding_gemspec( 'ruby', core, &block )
|
156
|
-
end
|
157
|
-
|
158
|
-
# Internal: Return the gemspec for the jruby platform
|
159
|
-
def java_gemspec( core = core_gemspec, &block )
|
160
|
-
yielding_gemspec( 'java', core, &block )
|
161
|
-
end
|
162
|
-
|
163
|
-
# Internal: give an initial spec and a key, create a new gemspec based off of
|
164
|
-
# it.
|
165
|
-
#
|
166
|
-
# This will force the new gemspecs 'platform' to be that of the key, since the
|
167
|
-
# only reason you would have multiple gemspecs at this point is to deal with
|
168
|
-
# different platforms.
|
169
|
-
def yielding_gemspec( key, core )
|
170
|
-
spec = gemspecs[key] ||= core.dup
|
171
|
-
spec.platform = key
|
172
|
-
yield spec if block_given?
|
173
|
-
return spec
|
174
|
-
end
|
175
|
-
|
176
|
-
# Internal: Return the platform of ThisProject at the current moment in time.
|
177
|
-
def platform
|
178
|
-
(RUBY_PLATFORM == "java") ? 'java' : Gem::Platform::RUBY
|
179
|
-
end
|
180
|
-
|
181
|
-
# Internal: Return the DESCRIPTION section of the README.rdoc file
|
182
|
-
def description_section
|
183
|
-
section_of( 'README.md', 'DESCRIPTION')
|
184
|
-
end
|
185
|
-
|
186
|
-
# Internal: Return the summary text from the README
|
187
|
-
def summary
|
188
|
-
description_section.first
|
189
|
-
end
|
190
|
-
|
191
|
-
# Internal: Return the full description text from the README
|
192
|
-
def description
|
193
|
-
description_section.join(" ").tr("\n", ' ').gsub(/[{}]/,'').gsub(/\[[^\]]+\]/,'') # strip rdoc
|
194
|
-
end
|
195
|
-
|
196
|
-
def license
|
197
|
-
license_file = project_path("LICENSE.txt")
|
198
|
-
line = license_file.readlines.first
|
199
|
-
line.split(/\s+/).first
|
200
|
-
end
|
201
|
-
|
202
|
-
# Internal: The path to the gemspec file
|
203
|
-
def gemspec_file
|
204
|
-
project_path( "#{ name }.gemspec" )
|
205
|
-
end
|
206
|
-
end
|
207
|
-
|
208
|
-
This = ThisProject.new
|
data/test/device_dataset.rb
DELETED
@@ -1,117 +0,0 @@
|
|
1
|
-
class DeviceDataset
|
2
|
-
|
3
|
-
include TestHelper
|
4
|
-
|
5
|
-
attr_reader :count
|
6
|
-
attr_reader :compare_fields
|
7
|
-
attr_reader :fields
|
8
|
-
|
9
|
-
attr_reader :filename_jsonl
|
10
|
-
attr_reader :filename_sorted_jsonl
|
11
|
-
attr_reader :filename_csv
|
12
|
-
attr_reader :filename_sorted_csv
|
13
|
-
|
14
|
-
def initialize(count:, compare_fields: [ "manufacturer", "model_name", "slug" ])
|
15
|
-
@count = count
|
16
|
-
@compare_fields = compare_fields
|
17
|
-
@fields = %w[
|
18
|
-
build_number
|
19
|
-
manufacturer
|
20
|
-
model_name
|
21
|
-
platform
|
22
|
-
serial
|
23
|
-
slug
|
24
|
-
]
|
25
|
-
@filename_sorted_jsonl = nil
|
26
|
-
@filename_jsonl = nil
|
27
|
-
@filename_sorted_csv = nil
|
28
|
-
@filename_csv = nil
|
29
|
-
@slug = generate_slug
|
30
|
-
end
|
31
|
-
|
32
|
-
def persist_records_as_jsonl
|
33
|
-
@filename_jsonl = scratch_file(prefix: "unsorted_", slug: @slug)
|
34
|
-
@filename_jsonl.open("w+") do |f|
|
35
|
-
f.write(records_as_jsonl)
|
36
|
-
end
|
37
|
-
end
|
38
|
-
|
39
|
-
def persist_sorted_records_as_jsonl
|
40
|
-
@filename_sorted_jsonl = scratch_file(prefix: "sorted_", slug: @slug)
|
41
|
-
@filename_sorted_jsonl.open("w+") do |f|
|
42
|
-
f.write(sorted_records_as_jsonl)
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
|
-
def cleanup_files
|
47
|
-
[ @filename_sorted_jsonl, @filename_jsonl, @filename_sorted_csv, @filename_csv ].each do |p|
|
48
|
-
next if p.nil?
|
49
|
-
p.unlink if p.exist?
|
50
|
-
end
|
51
|
-
end
|
52
|
-
|
53
|
-
def records
|
54
|
-
@records ||= Array.new.tap do |a|
|
55
|
-
count.times do
|
56
|
-
a << Hash.new.tap do |h|
|
57
|
-
fields.each do |f|
|
58
|
-
value = (f == 'slug') ? generate_slug : ::Faker::Device.send(f)
|
59
|
-
h[f] = value
|
60
|
-
end
|
61
|
-
end
|
62
|
-
end
|
63
|
-
end
|
64
|
-
end
|
65
|
-
|
66
|
-
def sorted_records
|
67
|
-
@sorted_records ||= records.sort_by do |r|
|
68
|
-
compare_fields.map { |field| r[field] }
|
69
|
-
end
|
70
|
-
end
|
71
|
-
|
72
|
-
def records_as_jsonl
|
73
|
-
@jsonl_records ||= as_jsonl(list: records)
|
74
|
-
end
|
75
|
-
|
76
|
-
def records_as_csv
|
77
|
-
@csv_records ||= as_csv(list: records)
|
78
|
-
end
|
79
|
-
|
80
|
-
def records_as_csv_rows
|
81
|
-
@csv_rows ||= as_csv_rows(records_as_csv)
|
82
|
-
end
|
83
|
-
|
84
|
-
def sorted_records_as_jsonl
|
85
|
-
@jsonl_sorted_records ||= as_jsonl(list: sorted_records)
|
86
|
-
end
|
87
|
-
|
88
|
-
def sorted_records_as_csv
|
89
|
-
@csv_sorted_records ||= as_csv(list: sorted_records)
|
90
|
-
end
|
91
|
-
|
92
|
-
def sorted_records_as_csv_rows
|
93
|
-
@csv_sorted_rows ||= as_csv_rows(sorted_records_as_csv)
|
94
|
-
end
|
95
|
-
|
96
|
-
private
|
97
|
-
|
98
|
-
def as_jsonl(list:)
|
99
|
-
list.map { |r| Oj.dump(r) }.join("\n") + "\n"
|
100
|
-
end
|
101
|
-
|
102
|
-
def as_csv(list:, headers: fields)
|
103
|
-
CSV.generate('', headers: headers , write_headers: true) do |csv|
|
104
|
-
list.each do |r|
|
105
|
-
csv << fields.map { |f| r[f] }
|
106
|
-
end
|
107
|
-
end
|
108
|
-
end
|
109
|
-
|
110
|
-
def as_csv_rows(text)
|
111
|
-
Array.new.tap do |a|
|
112
|
-
CSV.new(text, converters: :numeric, headers: :first_row, return_headers: false).each do |row|
|
113
|
-
a << row
|
114
|
-
end
|
115
|
-
end
|
116
|
-
end
|
117
|
-
end
|
data/test/input/test_file.rb
DELETED
@@ -1,73 +0,0 @@
|
|
1
|
-
require_relative '../test_helper'
|
2
|
-
|
3
|
-
module TestInput
|
4
|
-
class TestFile < ::Minitest::Test
|
5
|
-
def test_does_not_handle_stdin_text
|
6
|
-
::FlatKit::Input::IO::STDINS.each do |e|
|
7
|
-
refute(::FlatKit::Input::File.handles?(e), "#{e} is not stdin text")
|
8
|
-
end
|
9
|
-
end
|
10
|
-
|
11
|
-
def test_handles_existing_file
|
12
|
-
test_path = "tmp/test_handles_existing_file.txt"
|
13
|
-
begin
|
14
|
-
IO.write(test_path,"test handles existing file")
|
15
|
-
assert(::FlatKit::Input::File.handles?(test_path))
|
16
|
-
ensure
|
17
|
-
File.unlink(test_path) if File.exist?(test_path)
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
def test_only_handles_string
|
22
|
-
refute(::FlatKit::Input::File.handles?(Object.new))
|
23
|
-
end
|
24
|
-
|
25
|
-
def test_raises_error_if_not_readable
|
26
|
-
assert_raises(FlatKit::Error) { ::FlatKit::Input::File.new("tmp/does-not-exist") }
|
27
|
-
end
|
28
|
-
|
29
|
-
def test_init_from_path
|
30
|
-
test_path = "tmp/test_init_from_path.txt"
|
31
|
-
begin
|
32
|
-
IO.write(test_path,"nothing to see here")
|
33
|
-
io = ::FlatKit::Input::File.new(test_path)
|
34
|
-
assert_equal(test_path, io.name)
|
35
|
-
assert_instance_of(::File, io.io)
|
36
|
-
ensure
|
37
|
-
File.unlink(test_path) if File.exist?(test_path)
|
38
|
-
end
|
39
|
-
end
|
40
|
-
|
41
|
-
def test_reads_from_file
|
42
|
-
test_path = "tmp/test_reads_from_file.txt"
|
43
|
-
begin
|
44
|
-
text = "test_reads_from_file"
|
45
|
-
IO.write(test_path,text)
|
46
|
-
|
47
|
-
input = ::FlatKit::Input::File.new(test_path)
|
48
|
-
content = input.io.read
|
49
|
-
assert_equal(text, content)
|
50
|
-
|
51
|
-
input.close
|
52
|
-
ensure
|
53
|
-
File.unlink(test_path) if File.exist?(test_path)
|
54
|
-
end
|
55
|
-
end
|
56
|
-
|
57
|
-
def test_reads_from_gzfile
|
58
|
-
test_path = "tmp/test_reads_from_gzfile.txt.gz"
|
59
|
-
begin
|
60
|
-
text = "this is something to read"
|
61
|
-
system("echo '#{text}' | gzip > #{test_path}")
|
62
|
-
|
63
|
-
input = ::FlatKit::Input::File.new(test_path)
|
64
|
-
content = input.io.read
|
65
|
-
assert_equal(text + "\n", content)
|
66
|
-
|
67
|
-
input.close
|
68
|
-
ensure
|
69
|
-
File.unlink(test_path) if File.exist?(test_path)
|
70
|
-
end
|
71
|
-
end
|
72
|
-
end
|
73
|
-
end
|
data/test/input/test_io.rb
DELETED
@@ -1,93 +0,0 @@
|
|
1
|
-
require_relative '../test_helper'
|
2
|
-
|
3
|
-
module TestInput
|
4
|
-
class NullIO < ::IO
|
5
|
-
def initialize()
|
6
|
-
end
|
7
|
-
end
|
8
|
-
|
9
|
-
class TestIO < ::Minitest::Test
|
10
|
-
|
11
|
-
def test_handles_stdin_text
|
12
|
-
::FlatKit::Input::IO::STDINS.each do |e|
|
13
|
-
assert(::FlatKit::Input::IO.handles?(e), "#{e} is not stdin text")
|
14
|
-
end
|
15
|
-
end
|
16
|
-
|
17
|
-
def test_handles_stdin_io
|
18
|
-
x = $stdin
|
19
|
-
assert(::FlatKit::Input::IO.handles?(x), "is not stdin")
|
20
|
-
end
|
21
|
-
|
22
|
-
def test_handles_stringio
|
23
|
-
assert(::FlatKit::Input::IO.handles?(StringIO.new))
|
24
|
-
end
|
25
|
-
|
26
|
-
def test_does_not_handle_other
|
27
|
-
x = Object.new
|
28
|
-
refute(::FlatKit::Input::IO.handles?(x))
|
29
|
-
end
|
30
|
-
|
31
|
-
def test_init_from_dash
|
32
|
-
io = ::FlatKit::Input::IO.new("-")
|
33
|
-
assert_equal("<STDIN>", io.name)
|
34
|
-
assert_equal(::STDIN, io.io)
|
35
|
-
end
|
36
|
-
|
37
|
-
def test_init_from_file_object
|
38
|
-
test_path = "tmp/test_init_from_file_object.txt"
|
39
|
-
begin
|
40
|
-
File.open(test_path, "w+") do |f|
|
41
|
-
io = ::FlatKit::Input::IO.new(f)
|
42
|
-
assert_equal(test_path, io.name)
|
43
|
-
assert_instance_of(::File, io.io)
|
44
|
-
end
|
45
|
-
ensure
|
46
|
-
File.unlink(test_path) if File.exist?(test_path)
|
47
|
-
end
|
48
|
-
end
|
49
|
-
|
50
|
-
def test_init_from_string_io_object
|
51
|
-
sio = StringIO.new
|
52
|
-
io = ::FlatKit::Input::IO.new(sio)
|
53
|
-
assert_match(/StringIO/, io.name)
|
54
|
-
assert_instance_of(::StringIO, io.io)
|
55
|
-
end
|
56
|
-
|
57
|
-
def test_init_from_io_object
|
58
|
-
null_io = NullIO.new
|
59
|
-
io = ::FlatKit::Input::IO.new(null_io)
|
60
|
-
assert_match(/NullIO/, io.name)
|
61
|
-
assert_instance_of(::TestInput::NullIO, io.io)
|
62
|
-
end
|
63
|
-
|
64
|
-
def test_init_from_stdin
|
65
|
-
io = ::FlatKit::Input::IO.new($stdin)
|
66
|
-
assert_equal("<STDIN>", io.name)
|
67
|
-
assert_equal(::STDIN, io.io)
|
68
|
-
end
|
69
|
-
|
70
|
-
def test_init_from_invalid
|
71
|
-
assert_raises(::FlatKit::Error) { ::FlatKit::Input::IO.new(Object.new) }
|
72
|
-
end
|
73
|
-
|
74
|
-
def test_reads_from_io
|
75
|
-
test_path = "tmp/test_reads_from_io.txt"
|
76
|
-
begin
|
77
|
-
line = "This is a line to read"
|
78
|
-
File.open(test_path, "w+") do |f|
|
79
|
-
f.write(line)
|
80
|
-
f.rewind
|
81
|
-
|
82
|
-
io = ::FlatKit::Input::IO.new(f)
|
83
|
-
content = io.io.read
|
84
|
-
io.close
|
85
|
-
assert_equal(content, line)
|
86
|
-
end
|
87
|
-
ensure
|
88
|
-
File.unlink(test_path) if File.exist?(test_path)
|
89
|
-
end
|
90
|
-
end
|
91
|
-
|
92
|
-
end
|
93
|
-
end
|
data/test/jsonl/test_format.rb
DELETED
@@ -1,22 +0,0 @@
|
|
1
|
-
require_relative '../test_helper'
|
2
|
-
|
3
|
-
module TestJsonl
|
4
|
-
class TestFormat < ::Minitest::Test
|
5
|
-
|
6
|
-
def test_handles_json
|
7
|
-
assert(::FlatKit::Jsonl::Format.handles?("data.json.gz"))
|
8
|
-
end
|
9
|
-
|
10
|
-
def test_handles_jsonl
|
11
|
-
assert(::FlatKit::Jsonl::Format.handles?("data.jsonl"))
|
12
|
-
end
|
13
|
-
|
14
|
-
def test_handles_ndjson
|
15
|
-
assert(::FlatKit::Jsonl::Format.handles?("log.ndjson"))
|
16
|
-
end
|
17
|
-
|
18
|
-
def test_does_not_handle_csv
|
19
|
-
refute(::FlatKit::Jsonl::Format.handles?("data.csv"))
|
20
|
-
end
|
21
|
-
end
|
22
|
-
end
|
data/test/jsonl/test_reader.rb
DELETED
@@ -1,49 +0,0 @@
|
|
1
|
-
require_relative '../test_helper'
|
2
|
-
|
3
|
-
module TestJsonl
|
4
|
-
class TestReader < ::Minitest::Test
|
5
|
-
def setup
|
6
|
-
@count = 20
|
7
|
-
@dataset = DeviceDataset.new(count: @count)
|
8
|
-
@compare_fields = @dataset.compare_fields
|
9
|
-
@test_path = "tmp/test_reads_from_io.jsonl"
|
10
|
-
|
11
|
-
File.open(@test_path, "wb") do |f|
|
12
|
-
f.write(@dataset.records_as_jsonl)
|
13
|
-
end
|
14
|
-
end
|
15
|
-
|
16
|
-
def teardown
|
17
|
-
File.unlink(@test_path) if File.exist?(@test_path)
|
18
|
-
end
|
19
|
-
|
20
|
-
def test_raises_error_on_invalid_source
|
21
|
-
assert_raises(::FlatKit::Error) {
|
22
|
-
::FlatKit::Jsonl::Reader.new(source: Object.new, compare_fields: nil)
|
23
|
-
}
|
24
|
-
end
|
25
|
-
|
26
|
-
def test_reads_from_pathname
|
27
|
-
reader = ::FlatKit::Jsonl::Reader.new(source: @test_path, compare_fields: @compare_fields)
|
28
|
-
all = reader.to_a
|
29
|
-
assert_equal(@count, reader.count)
|
30
|
-
assert_equal(@count, all.size)
|
31
|
-
end
|
32
|
-
|
33
|
-
def test_reads_from_io
|
34
|
-
File.open(@test_path) do |f|
|
35
|
-
reader = ::FlatKit::Jsonl::Reader.new(source: f, compare_fields: @compare_fields)
|
36
|
-
all = reader.to_a
|
37
|
-
assert_equal(@count, reader.count)
|
38
|
-
assert_equal(@count, all.size)
|
39
|
-
end
|
40
|
-
end
|
41
|
-
|
42
|
-
def test_raises_error_on_io_error
|
43
|
-
s = StringIO.new
|
44
|
-
s.close_read
|
45
|
-
reader = ::FlatKit::Jsonl::Reader.new(source: s, compare_fields: @compare_fields)
|
46
|
-
assert_raises(::FlatKit::Error) { reader.to_a }
|
47
|
-
end
|
48
|
-
end
|
49
|
-
end
|
data/test/jsonl/test_record.rb
DELETED
@@ -1,61 +0,0 @@
|
|
1
|
-
require_relative '../test_helper'
|
2
|
-
require 'faker'
|
3
|
-
require 'byebug'
|
4
|
-
|
5
|
-
module TestJsonl
|
6
|
-
class TestRecord < ::Minitest::Test
|
7
|
-
def setup
|
8
|
-
@one_row_dataset = DeviceDataset.new(count: 1)
|
9
|
-
@src_record = @one_row_dataset.records.first
|
10
|
-
@src_json = JSON.generate(@src_record)
|
11
|
-
@compare_fields = @one_row_dataset.compare_fields
|
12
|
-
end
|
13
|
-
|
14
|
-
def test_initializes_from_data
|
15
|
-
record = FlatKit::Jsonl::Record.new(data: @src_json, compare_fields: @compare_fields)
|
16
|
-
@compare_fields.each do |k|
|
17
|
-
assert_equal(@src_record[k], record[k])
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
def test_ignores_non_compare_fields_values
|
22
|
-
record = FlatKit::Jsonl::Record.new(data: @src_json, compare_fields: @compare_fields)
|
23
|
-
|
24
|
-
refute(record["version"])
|
25
|
-
end
|
26
|
-
|
27
|
-
def test_is_sortable
|
28
|
-
dataset = DeviceDataset.new(count: 20)
|
29
|
-
fk_records = Array.new.tap do |a|
|
30
|
-
dataset.records.each do |r|
|
31
|
-
data = JSON.generate(r)
|
32
|
-
record = FlatKit::Jsonl::Record.new(data: data, compare_fields: @compare_fields)
|
33
|
-
a << record
|
34
|
-
end
|
35
|
-
end
|
36
|
-
|
37
|
-
sorted = fk_records.sort
|
38
|
-
|
39
|
-
sio = StringIO.new
|
40
|
-
sorted.each do |r|
|
41
|
-
sio.puts(r.to_s)
|
42
|
-
end
|
43
|
-
|
44
|
-
sorted_string = sio.string
|
45
|
-
assert_equal(dataset.sorted_records_as_jsonl, sorted_string)
|
46
|
-
end
|
47
|
-
|
48
|
-
def test_converts_to_hash
|
49
|
-
record = FlatKit::Jsonl::Record.new(data: @src_json, compare_fields: @compare_fields)
|
50
|
-
h = record.to_hash
|
51
|
-
|
52
|
-
assert_equal(@src_record, h)
|
53
|
-
end
|
54
|
-
|
55
|
-
def test_converts_from_record
|
56
|
-
rec1 = FlatKit::Jsonl::Record.new(data: @src_json, compare_fields: @compare_fields)
|
57
|
-
rec2 = FlatKit::Jsonl::Record.from_record(rec1)
|
58
|
-
assert_equal(rec1, rec2)
|
59
|
-
end
|
60
|
-
end
|
61
|
-
end
|