flat_kit 0.2.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CONTRIBUTING.md +1 -2
- data/HISTORY.md +15 -0
- data/Manifest.txt +21 -26
- data/{bin → exe}/fk +2 -1
- data/flat_kit.gemspec +33 -0
- data/lib/flat_kit/cli.rb +48 -23
- data/lib/flat_kit/command/cat.rb +34 -32
- data/lib/flat_kit/command/merge.rb +37 -36
- data/lib/flat_kit/command/sort.rb +37 -37
- data/lib/flat_kit/command/stats.rb +96 -0
- data/lib/flat_kit/command.rb +10 -10
- data/lib/flat_kit/descendant_tracker.rb +17 -5
- data/lib/flat_kit/error.rb +4 -0
- data/lib/flat_kit/event_emitter.rb +7 -4
- data/lib/flat_kit/field_stats.rb +246 -0
- data/lib/flat_kit/field_type/boolean_type.rb +52 -0
- data/lib/flat_kit/field_type/date_type.rb +181 -0
- data/lib/flat_kit/field_type/float_type.rb +43 -0
- data/lib/flat_kit/field_type/guess_type.rb +23 -0
- data/lib/flat_kit/field_type/integer_type.rb +36 -0
- data/lib/flat_kit/field_type/null_type.rb +39 -0
- data/lib/flat_kit/field_type/string_type.rb +24 -0
- data/lib/flat_kit/field_type/timestamp_type.rb +48 -0
- data/lib/flat_kit/field_type/unknown_type.rb +30 -0
- data/lib/flat_kit/field_type.rb +83 -0
- data/lib/flat_kit/format.rb +11 -5
- data/lib/flat_kit/input/file.rb +11 -9
- data/lib/flat_kit/input/io.rb +18 -21
- data/lib/flat_kit/input.rb +8 -7
- data/lib/flat_kit/internal_node.rb +22 -19
- data/lib/flat_kit/jsonl/format.rb +6 -2
- data/lib/flat_kit/jsonl/reader.rb +7 -4
- data/lib/flat_kit/jsonl/record.rb +16 -19
- data/lib/flat_kit/jsonl/writer.rb +25 -18
- data/lib/flat_kit/jsonl.rb +8 -4
- data/lib/flat_kit/leaf_node.rb +6 -5
- data/lib/flat_kit/log_formatter.rb +20 -0
- data/lib/flat_kit/logger.rb +12 -19
- data/lib/flat_kit/merge.rb +21 -16
- data/lib/flat_kit/merge_tree.rb +5 -6
- data/lib/flat_kit/output/file.rb +13 -9
- data/lib/flat_kit/output/io.rb +40 -35
- data/lib/flat_kit/output.rb +12 -7
- data/lib/flat_kit/position.rb +18 -0
- data/lib/flat_kit/reader.rb +8 -8
- data/lib/flat_kit/record.rb +12 -12
- data/lib/flat_kit/sentinel_internal_node.rb +6 -5
- data/lib/flat_kit/sentinel_leaf_node.rb +4 -1
- data/lib/flat_kit/sort.rb +8 -9
- data/lib/flat_kit/stat_type/nominal_stats.rb +64 -0
- data/lib/flat_kit/stat_type/numerical_stats.rb +120 -0
- data/lib/flat_kit/stat_type/ordinal_stats.rb +37 -0
- data/lib/flat_kit/stat_type.rb +70 -0
- data/lib/flat_kit/stats.rb +64 -0
- data/lib/flat_kit/writer.rb +17 -3
- data/lib/flat_kit/xsv/format.rb +6 -2
- data/lib/flat_kit/xsv/reader.rb +8 -6
- data/lib/flat_kit/xsv/record.rb +21 -15
- data/lib/flat_kit/xsv/writer.rb +36 -18
- data/lib/flat_kit/xsv.rb +7 -4
- data/lib/flat_kit.rb +33 -21
- metadata +38 -113
- data/Rakefile +0 -20
- data/tasks/default.rake +0 -242
- data/tasks/extension.rake +0 -38
- data/tasks/man.rake +0 -7
- data/tasks/this.rb +0 -208
- data/test/device_dataset.rb +0 -117
- data/test/input/test_file.rb +0 -73
- data/test/input/test_io.rb +0 -93
- data/test/jsonl/test_format.rb +0 -22
- data/test/jsonl/test_reader.rb +0 -49
- data/test/jsonl/test_record.rb +0 -61
- data/test/jsonl/test_writer.rb +0 -68
- data/test/output/test_file.rb +0 -60
- data/test/output/test_io.rb +0 -104
- data/test/test_conversions.rb +0 -45
- data/test/test_event_emitter.rb +0 -72
- data/test/test_format.rb +0 -24
- data/test/test_helper.rb +0 -26
- data/test/test_merge.rb +0 -40
- data/test/test_merge_tree.rb +0 -64
- data/test/test_version.rb +0 -11
- data/test/xsv/test_format.rb +0 -22
- data/test/xsv/test_reader.rb +0 -61
- data/test/xsv/test_record.rb +0 -69
- data/test/xsv/test_writer.rb +0 -68
data/tasks/extension.rake
DELETED
@@ -1,38 +0,0 @@
|
|
1
|
-
# To be used if the gem has extensions.
|
2
|
-
# If this task set is inclueded then you will need to also have
|
3
|
-
#
|
4
|
-
# spec.add_development_dependency( 'rake-compiler', '~> 0.8.1' )
|
5
|
-
#
|
6
|
-
# in your top level rakefile
|
7
|
-
begin
|
8
|
-
require 'rake/extensiontask'
|
9
|
-
require 'rake/javaextensiontask'
|
10
|
-
|
11
|
-
if RUBY_PLATFORM == "java" then
|
12
|
-
|
13
|
-
Rake::JavaExtensionTask.new( This.name) do |ext|
|
14
|
-
ext.ext_dir = File.join( 'ext', This.name, "java" )
|
15
|
-
ext.lib_dir = File.join( 'lib', This.name )
|
16
|
-
ext.gem_spec = This.java_gemspec
|
17
|
-
end
|
18
|
-
|
19
|
-
else
|
20
|
-
|
21
|
-
Rake::ExtensionTask.new( This.name ) do |ext|
|
22
|
-
ext.ext_dir = File.join( 'ext', This.name, "c" )
|
23
|
-
ext.lib_dir = File.join( 'lib', This.name )
|
24
|
-
ext.gem_spec = This.ruby_gemspec
|
25
|
-
|
26
|
-
ext.cross_compile = true # enable cross compilation (requires cross compile toolchain)
|
27
|
-
ext.cross_platform = %w[x86-mingw32 x64-mingw32] # forces the Windows platform instead of the default one
|
28
|
-
# configure options only for cross compile
|
29
|
-
end
|
30
|
-
end
|
31
|
-
|
32
|
-
task :test_requirements => :compile
|
33
|
-
rescue LoadError
|
34
|
-
This.task_warning( 'extension' )
|
35
|
-
end
|
36
|
-
|
37
|
-
CLOBBER << FileList["lib/**/*.{jar,so,bundle}"]
|
38
|
-
CLOBBER << FileList["lib/#{This.name}/{1,2}.*/"]
|
data/tasks/man.rake
DELETED
@@ -1,7 +0,0 @@
|
|
1
|
-
#------------------------------------------------------------------------------
|
2
|
-
# man pages
|
3
|
-
#------------------------------------------------------------------------------
|
4
|
-
desc "Create the man pages"
|
5
|
-
task :man do
|
6
|
-
sh "ronn --roff #{FileList["man/*.ronn"]}"
|
7
|
-
end
|
data/tasks/this.rb
DELETED
@@ -1,208 +0,0 @@
|
|
1
|
-
require 'pathname'
|
2
|
-
|
3
|
-
# Public: A Class containing all the metadata and utilities needed to manage a
|
4
|
-
# ruby project.
|
5
|
-
class ThisProject
|
6
|
-
# The name of this project
|
7
|
-
attr_accessor :name
|
8
|
-
|
9
|
-
# The author's name
|
10
|
-
attr_accessor :author
|
11
|
-
|
12
|
-
# The email address of the author(s)
|
13
|
-
attr_accessor :email
|
14
|
-
|
15
|
-
# The homepage of this project
|
16
|
-
attr_accessor :homepage
|
17
|
-
|
18
|
-
# The regex of files to exclude from the manifest
|
19
|
-
attr_accessor :exclude_from_manifest
|
20
|
-
|
21
|
-
# The hash of Gem::Specifications keyed' by platform
|
22
|
-
attr_accessor :gemspecs
|
23
|
-
|
24
|
-
# Public: Initialize ThisProject
|
25
|
-
#
|
26
|
-
# Yields self
|
27
|
-
def initialize(&block)
|
28
|
-
@exclude_from_manifest = Regexp.union(/\.(git|DS_Store)/,
|
29
|
-
/^(doc|coverage|pkg|tmp|Gemfile(\.lock)?)/,
|
30
|
-
/^[^\/]+\.gemspec/,
|
31
|
-
/\.(swp|jar|bundle|so|rvmrc|travis.yml|byebug_history|fossa.yml|ruby-version)$/,
|
32
|
-
/~$/)
|
33
|
-
@gemspecs = Hash.new
|
34
|
-
yield self if block_given?
|
35
|
-
end
|
36
|
-
|
37
|
-
# Public: return the version of ThisProject
|
38
|
-
#
|
39
|
-
# Search the ruby files in the project looking for the one that has the
|
40
|
-
# version string in it. This does not eval any code in the project, it parses
|
41
|
-
# the source code looking for the string.
|
42
|
-
#
|
43
|
-
# Returns a String version
|
44
|
-
def version
|
45
|
-
[ "lib/#{ name }.rb", "lib/#{ name }/version.rb" ].each do |v|
|
46
|
-
path = project_path( v )
|
47
|
-
line = path.read[/^\s*VERSION\s*=\s*.*/]
|
48
|
-
if line then
|
49
|
-
return line.match(/.*VERSION\s*=\s*['"](.*)['"]/)[1]
|
50
|
-
end
|
51
|
-
end
|
52
|
-
end
|
53
|
-
|
54
|
-
# Internal: Return a section of an RDoc file with the given section name
|
55
|
-
#
|
56
|
-
# path - the relative path in the project of the file to parse
|
57
|
-
# section_name - the section out of the file from which to parse data
|
58
|
-
#
|
59
|
-
# Retuns the text of the section as an array of paragrphs.
|
60
|
-
def section_of( file, section_name )
|
61
|
-
re = /^[=#]+ (.*)$/
|
62
|
-
sectional = project_path( file )
|
63
|
-
parts = sectional.read.split( re )[1..-1]
|
64
|
-
parts.map! { |p| p.strip }
|
65
|
-
|
66
|
-
sections = Hash.new
|
67
|
-
Hash[*parts].each do |k,v|
|
68
|
-
sections[k] = v.split("\n\n")
|
69
|
-
end
|
70
|
-
return sections[section_name]
|
71
|
-
end
|
72
|
-
|
73
|
-
# Internal: print out a warning about the give task
|
74
|
-
def task_warning( task )
|
75
|
-
warn "WARNING: '#{task}' tasks are not defined. Please run 'rake develop'"
|
76
|
-
end
|
77
|
-
|
78
|
-
# Internal: Return the full path to the file that is relative to the project
|
79
|
-
# root.
|
80
|
-
#
|
81
|
-
# path - the relative path of the file from the project root
|
82
|
-
#
|
83
|
-
# Returns the Pathname of the file
|
84
|
-
def project_path( *relative_path )
|
85
|
-
project_root.join( *relative_path )
|
86
|
-
end
|
87
|
-
|
88
|
-
# Internal: The absolute path of this file
|
89
|
-
#
|
90
|
-
# Returns the Pathname of this file.
|
91
|
-
def this_file_path
|
92
|
-
Pathname.new( __FILE__ ).expand_path
|
93
|
-
end
|
94
|
-
|
95
|
-
# Internal: The root directory of this project
|
96
|
-
#
|
97
|
-
# This is defined as being the directory that is in the path of this project
|
98
|
-
# that has the first Rakefile
|
99
|
-
#
|
100
|
-
# Returns the Pathname of the directory
|
101
|
-
def project_root
|
102
|
-
this_file_path.ascend do |p|
|
103
|
-
rakefile = p.join( 'Rakefile' )
|
104
|
-
return p if rakefile.exist?
|
105
|
-
end
|
106
|
-
end
|
107
|
-
|
108
|
-
# Internal: Returns the contents of the Manifest.txt file as an array
|
109
|
-
#
|
110
|
-
# Returns an Array of strings
|
111
|
-
def manifest
|
112
|
-
manifest_file = project_path( "Manifest.txt" )
|
113
|
-
abort "You need a Manifest.txt" unless manifest_file.readable?
|
114
|
-
manifest_file.readlines.map { |l| l.strip }
|
115
|
-
end
|
116
|
-
|
117
|
-
# Internal: Return the files that define the extensions
|
118
|
-
#
|
119
|
-
# Returns an Array
|
120
|
-
def extension_conf_files
|
121
|
-
manifest.grep( /extconf.rb\Z/ )
|
122
|
-
end
|
123
|
-
|
124
|
-
# Internal: Returns the gemspace associated with the current ruby platform
|
125
|
-
def platform_gemspec
|
126
|
-
gemspecs.fetch(platform) { This.ruby_gemspec }
|
127
|
-
end
|
128
|
-
|
129
|
-
def core_gemspec
|
130
|
-
Gem::Specification.new do |spec|
|
131
|
-
spec.name = name
|
132
|
-
spec.version = version
|
133
|
-
spec.author = author
|
134
|
-
spec.email = email
|
135
|
-
spec.homepage = homepage
|
136
|
-
|
137
|
-
spec.summary = summary
|
138
|
-
spec.description = description
|
139
|
-
spec.license = license
|
140
|
-
|
141
|
-
spec.files = manifest
|
142
|
-
spec.executables = spec.files.grep(/^bin/) { |f| File.basename(f) }
|
143
|
-
spec.test_files = spec.files.grep(/^(spec|test)/)
|
144
|
-
|
145
|
-
spec.extra_rdoc_files += spec.files.grep(/(txt|rdoc|md)$/)
|
146
|
-
spec.rdoc_options = [ "--main" , 'README.md',
|
147
|
-
"--markup", "tomdoc" ]
|
148
|
-
|
149
|
-
spec.required_ruby_version = '>= 2.2.2'
|
150
|
-
end
|
151
|
-
end
|
152
|
-
|
153
|
-
# Internal: Return the gemspec for the ruby platform
|
154
|
-
def ruby_gemspec( core = core_gemspec, &block )
|
155
|
-
yielding_gemspec( 'ruby', core, &block )
|
156
|
-
end
|
157
|
-
|
158
|
-
# Internal: Return the gemspec for the jruby platform
|
159
|
-
def java_gemspec( core = core_gemspec, &block )
|
160
|
-
yielding_gemspec( 'java', core, &block )
|
161
|
-
end
|
162
|
-
|
163
|
-
# Internal: give an initial spec and a key, create a new gemspec based off of
|
164
|
-
# it.
|
165
|
-
#
|
166
|
-
# This will force the new gemspecs 'platform' to be that of the key, since the
|
167
|
-
# only reason you would have multiple gemspecs at this point is to deal with
|
168
|
-
# different platforms.
|
169
|
-
def yielding_gemspec( key, core )
|
170
|
-
spec = gemspecs[key] ||= core.dup
|
171
|
-
spec.platform = key
|
172
|
-
yield spec if block_given?
|
173
|
-
return spec
|
174
|
-
end
|
175
|
-
|
176
|
-
# Internal: Return the platform of ThisProject at the current moment in time.
|
177
|
-
def platform
|
178
|
-
(RUBY_PLATFORM == "java") ? 'java' : Gem::Platform::RUBY
|
179
|
-
end
|
180
|
-
|
181
|
-
# Internal: Return the DESCRIPTION section of the README.rdoc file
|
182
|
-
def description_section
|
183
|
-
section_of( 'README.md', 'DESCRIPTION')
|
184
|
-
end
|
185
|
-
|
186
|
-
# Internal: Return the summary text from the README
|
187
|
-
def summary
|
188
|
-
description_section.first
|
189
|
-
end
|
190
|
-
|
191
|
-
# Internal: Return the full description text from the README
|
192
|
-
def description
|
193
|
-
description_section.join(" ").tr("\n", ' ').gsub(/[{}]/,'').gsub(/\[[^\]]+\]/,'') # strip rdoc
|
194
|
-
end
|
195
|
-
|
196
|
-
def license
|
197
|
-
license_file = project_path("LICENSE.txt")
|
198
|
-
line = license_file.readlines.first
|
199
|
-
line.split(/\s+/).first
|
200
|
-
end
|
201
|
-
|
202
|
-
# Internal: The path to the gemspec file
|
203
|
-
def gemspec_file
|
204
|
-
project_path( "#{ name }.gemspec" )
|
205
|
-
end
|
206
|
-
end
|
207
|
-
|
208
|
-
This = ThisProject.new
|
data/test/device_dataset.rb
DELETED
@@ -1,117 +0,0 @@
|
|
1
|
-
class DeviceDataset
|
2
|
-
|
3
|
-
include TestHelper
|
4
|
-
|
5
|
-
attr_reader :count
|
6
|
-
attr_reader :compare_fields
|
7
|
-
attr_reader :fields
|
8
|
-
|
9
|
-
attr_reader :filename_jsonl
|
10
|
-
attr_reader :filename_sorted_jsonl
|
11
|
-
attr_reader :filename_csv
|
12
|
-
attr_reader :filename_sorted_csv
|
13
|
-
|
14
|
-
def initialize(count:, compare_fields: [ "manufacturer", "model_name", "slug" ])
|
15
|
-
@count = count
|
16
|
-
@compare_fields = compare_fields
|
17
|
-
@fields = %w[
|
18
|
-
build_number
|
19
|
-
manufacturer
|
20
|
-
model_name
|
21
|
-
platform
|
22
|
-
serial
|
23
|
-
slug
|
24
|
-
]
|
25
|
-
@filename_sorted_jsonl = nil
|
26
|
-
@filename_jsonl = nil
|
27
|
-
@filename_sorted_csv = nil
|
28
|
-
@filename_csv = nil
|
29
|
-
@slug = generate_slug
|
30
|
-
end
|
31
|
-
|
32
|
-
def persist_records_as_jsonl
|
33
|
-
@filename_jsonl = scratch_file(prefix: "unsorted_", slug: @slug)
|
34
|
-
@filename_jsonl.open("w+") do |f|
|
35
|
-
f.write(records_as_jsonl)
|
36
|
-
end
|
37
|
-
end
|
38
|
-
|
39
|
-
def persist_sorted_records_as_jsonl
|
40
|
-
@filename_sorted_jsonl = scratch_file(prefix: "sorted_", slug: @slug)
|
41
|
-
@filename_sorted_jsonl.open("w+") do |f|
|
42
|
-
f.write(sorted_records_as_jsonl)
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
|
-
def cleanup_files
|
47
|
-
[ @filename_sorted_jsonl, @filename_jsonl, @filename_sorted_csv, @filename_csv ].each do |p|
|
48
|
-
next if p.nil?
|
49
|
-
p.unlink if p.exist?
|
50
|
-
end
|
51
|
-
end
|
52
|
-
|
53
|
-
def records
|
54
|
-
@records ||= Array.new.tap do |a|
|
55
|
-
count.times do
|
56
|
-
a << Hash.new.tap do |h|
|
57
|
-
fields.each do |f|
|
58
|
-
value = (f == 'slug') ? generate_slug : ::Faker::Device.send(f)
|
59
|
-
h[f] = value
|
60
|
-
end
|
61
|
-
end
|
62
|
-
end
|
63
|
-
end
|
64
|
-
end
|
65
|
-
|
66
|
-
def sorted_records
|
67
|
-
@sorted_records ||= records.sort_by do |r|
|
68
|
-
compare_fields.map { |field| r[field] }
|
69
|
-
end
|
70
|
-
end
|
71
|
-
|
72
|
-
def records_as_jsonl
|
73
|
-
@jsonl_records ||= as_jsonl(list: records)
|
74
|
-
end
|
75
|
-
|
76
|
-
def records_as_csv
|
77
|
-
@csv_records ||= as_csv(list: records)
|
78
|
-
end
|
79
|
-
|
80
|
-
def records_as_csv_rows
|
81
|
-
@csv_rows ||= as_csv_rows(records_as_csv)
|
82
|
-
end
|
83
|
-
|
84
|
-
def sorted_records_as_jsonl
|
85
|
-
@jsonl_sorted_records ||= as_jsonl(list: sorted_records)
|
86
|
-
end
|
87
|
-
|
88
|
-
def sorted_records_as_csv
|
89
|
-
@csv_sorted_records ||= as_csv(list: sorted_records)
|
90
|
-
end
|
91
|
-
|
92
|
-
def sorted_records_as_csv_rows
|
93
|
-
@csv_sorted_rows ||= as_csv_rows(sorted_records_as_csv)
|
94
|
-
end
|
95
|
-
|
96
|
-
private
|
97
|
-
|
98
|
-
def as_jsonl(list:)
|
99
|
-
list.map { |r| Oj.dump(r) }.join("\n") + "\n"
|
100
|
-
end
|
101
|
-
|
102
|
-
def as_csv(list:, headers: fields)
|
103
|
-
CSV.generate('', headers: headers , write_headers: true) do |csv|
|
104
|
-
list.each do |r|
|
105
|
-
csv << fields.map { |f| r[f] }
|
106
|
-
end
|
107
|
-
end
|
108
|
-
end
|
109
|
-
|
110
|
-
def as_csv_rows(text)
|
111
|
-
Array.new.tap do |a|
|
112
|
-
CSV.new(text, converters: :numeric, headers: :first_row, return_headers: false).each do |row|
|
113
|
-
a << row
|
114
|
-
end
|
115
|
-
end
|
116
|
-
end
|
117
|
-
end
|
data/test/input/test_file.rb
DELETED
@@ -1,73 +0,0 @@
|
|
1
|
-
require_relative '../test_helper'
|
2
|
-
|
3
|
-
module TestInput
|
4
|
-
class TestFile < ::Minitest::Test
|
5
|
-
def test_does_not_handle_stdin_text
|
6
|
-
::FlatKit::Input::IO::STDINS.each do |e|
|
7
|
-
refute(::FlatKit::Input::File.handles?(e), "#{e} is not stdin text")
|
8
|
-
end
|
9
|
-
end
|
10
|
-
|
11
|
-
def test_handles_existing_file
|
12
|
-
test_path = "tmp/test_handles_existing_file.txt"
|
13
|
-
begin
|
14
|
-
IO.write(test_path,"test handles existing file")
|
15
|
-
assert(::FlatKit::Input::File.handles?(test_path))
|
16
|
-
ensure
|
17
|
-
File.unlink(test_path) if File.exist?(test_path)
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
def test_only_handles_string
|
22
|
-
refute(::FlatKit::Input::File.handles?(Object.new))
|
23
|
-
end
|
24
|
-
|
25
|
-
def test_raises_error_if_not_readable
|
26
|
-
assert_raises(FlatKit::Error) { ::FlatKit::Input::File.new("tmp/does-not-exist") }
|
27
|
-
end
|
28
|
-
|
29
|
-
def test_init_from_path
|
30
|
-
test_path = "tmp/test_init_from_path.txt"
|
31
|
-
begin
|
32
|
-
IO.write(test_path,"nothing to see here")
|
33
|
-
io = ::FlatKit::Input::File.new(test_path)
|
34
|
-
assert_equal(test_path, io.name)
|
35
|
-
assert_instance_of(::File, io.io)
|
36
|
-
ensure
|
37
|
-
File.unlink(test_path) if File.exist?(test_path)
|
38
|
-
end
|
39
|
-
end
|
40
|
-
|
41
|
-
def test_reads_from_file
|
42
|
-
test_path = "tmp/test_reads_from_file.txt"
|
43
|
-
begin
|
44
|
-
text = "test_reads_from_file"
|
45
|
-
IO.write(test_path,text)
|
46
|
-
|
47
|
-
input = ::FlatKit::Input::File.new(test_path)
|
48
|
-
content = input.io.read
|
49
|
-
assert_equal(text, content)
|
50
|
-
|
51
|
-
input.close
|
52
|
-
ensure
|
53
|
-
File.unlink(test_path) if File.exist?(test_path)
|
54
|
-
end
|
55
|
-
end
|
56
|
-
|
57
|
-
def test_reads_from_gzfile
|
58
|
-
test_path = "tmp/test_reads_from_gzfile.txt.gz"
|
59
|
-
begin
|
60
|
-
text = "this is something to read"
|
61
|
-
system("echo '#{text}' | gzip > #{test_path}")
|
62
|
-
|
63
|
-
input = ::FlatKit::Input::File.new(test_path)
|
64
|
-
content = input.io.read
|
65
|
-
assert_equal(text + "\n", content)
|
66
|
-
|
67
|
-
input.close
|
68
|
-
ensure
|
69
|
-
File.unlink(test_path) if File.exist?(test_path)
|
70
|
-
end
|
71
|
-
end
|
72
|
-
end
|
73
|
-
end
|
data/test/input/test_io.rb
DELETED
@@ -1,93 +0,0 @@
|
|
1
|
-
require_relative '../test_helper'
|
2
|
-
|
3
|
-
module TestInput
|
4
|
-
class NullIO < ::IO
|
5
|
-
def initialize()
|
6
|
-
end
|
7
|
-
end
|
8
|
-
|
9
|
-
class TestIO < ::Minitest::Test
|
10
|
-
|
11
|
-
def test_handles_stdin_text
|
12
|
-
::FlatKit::Input::IO::STDINS.each do |e|
|
13
|
-
assert(::FlatKit::Input::IO.handles?(e), "#{e} is not stdin text")
|
14
|
-
end
|
15
|
-
end
|
16
|
-
|
17
|
-
def test_handles_stdin_io
|
18
|
-
x = $stdin
|
19
|
-
assert(::FlatKit::Input::IO.handles?(x), "is not stdin")
|
20
|
-
end
|
21
|
-
|
22
|
-
def test_handles_stringio
|
23
|
-
assert(::FlatKit::Input::IO.handles?(StringIO.new))
|
24
|
-
end
|
25
|
-
|
26
|
-
def test_does_not_handle_other
|
27
|
-
x = Object.new
|
28
|
-
refute(::FlatKit::Input::IO.handles?(x))
|
29
|
-
end
|
30
|
-
|
31
|
-
def test_init_from_dash
|
32
|
-
io = ::FlatKit::Input::IO.new("-")
|
33
|
-
assert_equal("<STDIN>", io.name)
|
34
|
-
assert_equal(::STDIN, io.io)
|
35
|
-
end
|
36
|
-
|
37
|
-
def test_init_from_file_object
|
38
|
-
test_path = "tmp/test_init_from_file_object.txt"
|
39
|
-
begin
|
40
|
-
File.open(test_path, "w+") do |f|
|
41
|
-
io = ::FlatKit::Input::IO.new(f)
|
42
|
-
assert_equal(test_path, io.name)
|
43
|
-
assert_instance_of(::File, io.io)
|
44
|
-
end
|
45
|
-
ensure
|
46
|
-
File.unlink(test_path) if File.exist?(test_path)
|
47
|
-
end
|
48
|
-
end
|
49
|
-
|
50
|
-
def test_init_from_string_io_object
|
51
|
-
sio = StringIO.new
|
52
|
-
io = ::FlatKit::Input::IO.new(sio)
|
53
|
-
assert_match(/StringIO/, io.name)
|
54
|
-
assert_instance_of(::StringIO, io.io)
|
55
|
-
end
|
56
|
-
|
57
|
-
def test_init_from_io_object
|
58
|
-
null_io = NullIO.new
|
59
|
-
io = ::FlatKit::Input::IO.new(null_io)
|
60
|
-
assert_match(/NullIO/, io.name)
|
61
|
-
assert_instance_of(::TestInput::NullIO, io.io)
|
62
|
-
end
|
63
|
-
|
64
|
-
def test_init_from_stdin
|
65
|
-
io = ::FlatKit::Input::IO.new($stdin)
|
66
|
-
assert_equal("<STDIN>", io.name)
|
67
|
-
assert_equal(::STDIN, io.io)
|
68
|
-
end
|
69
|
-
|
70
|
-
def test_init_from_invalid
|
71
|
-
assert_raises(::FlatKit::Error) { ::FlatKit::Input::IO.new(Object.new) }
|
72
|
-
end
|
73
|
-
|
74
|
-
def test_reads_from_io
|
75
|
-
test_path = "tmp/test_reads_from_io.txt"
|
76
|
-
begin
|
77
|
-
line = "This is a line to read"
|
78
|
-
File.open(test_path, "w+") do |f|
|
79
|
-
f.write(line)
|
80
|
-
f.rewind
|
81
|
-
|
82
|
-
io = ::FlatKit::Input::IO.new(f)
|
83
|
-
content = io.io.read
|
84
|
-
io.close
|
85
|
-
assert_equal(content, line)
|
86
|
-
end
|
87
|
-
ensure
|
88
|
-
File.unlink(test_path) if File.exist?(test_path)
|
89
|
-
end
|
90
|
-
end
|
91
|
-
|
92
|
-
end
|
93
|
-
end
|
data/test/jsonl/test_format.rb
DELETED
@@ -1,22 +0,0 @@
|
|
1
|
-
require_relative '../test_helper'
|
2
|
-
|
3
|
-
module TestJsonl
|
4
|
-
class TestFormat < ::Minitest::Test
|
5
|
-
|
6
|
-
def test_handles_json
|
7
|
-
assert(::FlatKit::Jsonl::Format.handles?("data.json.gz"))
|
8
|
-
end
|
9
|
-
|
10
|
-
def test_handles_jsonl
|
11
|
-
assert(::FlatKit::Jsonl::Format.handles?("data.jsonl"))
|
12
|
-
end
|
13
|
-
|
14
|
-
def test_handles_ndjson
|
15
|
-
assert(::FlatKit::Jsonl::Format.handles?("log.ndjson"))
|
16
|
-
end
|
17
|
-
|
18
|
-
def test_does_not_handle_csv
|
19
|
-
refute(::FlatKit::Jsonl::Format.handles?("data.csv"))
|
20
|
-
end
|
21
|
-
end
|
22
|
-
end
|
data/test/jsonl/test_reader.rb
DELETED
@@ -1,49 +0,0 @@
|
|
1
|
-
require_relative '../test_helper'
|
2
|
-
|
3
|
-
module TestJsonl
|
4
|
-
class TestReader < ::Minitest::Test
|
5
|
-
def setup
|
6
|
-
@count = 20
|
7
|
-
@dataset = DeviceDataset.new(count: @count)
|
8
|
-
@compare_fields = @dataset.compare_fields
|
9
|
-
@test_path = "tmp/test_reads_from_io.jsonl"
|
10
|
-
|
11
|
-
File.open(@test_path, "wb") do |f|
|
12
|
-
f.write(@dataset.records_as_jsonl)
|
13
|
-
end
|
14
|
-
end
|
15
|
-
|
16
|
-
def teardown
|
17
|
-
File.unlink(@test_path) if File.exist?(@test_path)
|
18
|
-
end
|
19
|
-
|
20
|
-
def test_raises_error_on_invalid_source
|
21
|
-
assert_raises(::FlatKit::Error) {
|
22
|
-
::FlatKit::Jsonl::Reader.new(source: Object.new, compare_fields: nil)
|
23
|
-
}
|
24
|
-
end
|
25
|
-
|
26
|
-
def test_reads_from_pathname
|
27
|
-
reader = ::FlatKit::Jsonl::Reader.new(source: @test_path, compare_fields: @compare_fields)
|
28
|
-
all = reader.to_a
|
29
|
-
assert_equal(@count, reader.count)
|
30
|
-
assert_equal(@count, all.size)
|
31
|
-
end
|
32
|
-
|
33
|
-
def test_reads_from_io
|
34
|
-
File.open(@test_path) do |f|
|
35
|
-
reader = ::FlatKit::Jsonl::Reader.new(source: f, compare_fields: @compare_fields)
|
36
|
-
all = reader.to_a
|
37
|
-
assert_equal(@count, reader.count)
|
38
|
-
assert_equal(@count, all.size)
|
39
|
-
end
|
40
|
-
end
|
41
|
-
|
42
|
-
def test_raises_error_on_io_error
|
43
|
-
s = StringIO.new
|
44
|
-
s.close_read
|
45
|
-
reader = ::FlatKit::Jsonl::Reader.new(source: s, compare_fields: @compare_fields)
|
46
|
-
assert_raises(::FlatKit::Error) { reader.to_a }
|
47
|
-
end
|
48
|
-
end
|
49
|
-
end
|
data/test/jsonl/test_record.rb
DELETED
@@ -1,61 +0,0 @@
|
|
1
|
-
require_relative '../test_helper'
|
2
|
-
require 'faker'
|
3
|
-
require 'byebug'
|
4
|
-
|
5
|
-
module TestJsonl
|
6
|
-
class TestRecord < ::Minitest::Test
|
7
|
-
def setup
|
8
|
-
@one_row_dataset = DeviceDataset.new(count: 1)
|
9
|
-
@src_record = @one_row_dataset.records.first
|
10
|
-
@src_json = JSON.generate(@src_record)
|
11
|
-
@compare_fields = @one_row_dataset.compare_fields
|
12
|
-
end
|
13
|
-
|
14
|
-
def test_initializes_from_data
|
15
|
-
record = FlatKit::Jsonl::Record.new(data: @src_json, compare_fields: @compare_fields)
|
16
|
-
@compare_fields.each do |k|
|
17
|
-
assert_equal(@src_record[k], record[k])
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
def test_ignores_non_compare_fields_values
|
22
|
-
record = FlatKit::Jsonl::Record.new(data: @src_json, compare_fields: @compare_fields)
|
23
|
-
|
24
|
-
refute(record["version"])
|
25
|
-
end
|
26
|
-
|
27
|
-
def test_is_sortable
|
28
|
-
dataset = DeviceDataset.new(count: 20)
|
29
|
-
fk_records = Array.new.tap do |a|
|
30
|
-
dataset.records.each do |r|
|
31
|
-
data = JSON.generate(r)
|
32
|
-
record = FlatKit::Jsonl::Record.new(data: data, compare_fields: @compare_fields)
|
33
|
-
a << record
|
34
|
-
end
|
35
|
-
end
|
36
|
-
|
37
|
-
sorted = fk_records.sort
|
38
|
-
|
39
|
-
sio = StringIO.new
|
40
|
-
sorted.each do |r|
|
41
|
-
sio.puts(r.to_s)
|
42
|
-
end
|
43
|
-
|
44
|
-
sorted_string = sio.string
|
45
|
-
assert_equal(dataset.sorted_records_as_jsonl, sorted_string)
|
46
|
-
end
|
47
|
-
|
48
|
-
def test_converts_to_hash
|
49
|
-
record = FlatKit::Jsonl::Record.new(data: @src_json, compare_fields: @compare_fields)
|
50
|
-
h = record.to_hash
|
51
|
-
|
52
|
-
assert_equal(@src_record, h)
|
53
|
-
end
|
54
|
-
|
55
|
-
def test_converts_from_record
|
56
|
-
rec1 = FlatKit::Jsonl::Record.new(data: @src_json, compare_fields: @compare_fields)
|
57
|
-
rec2 = FlatKit::Jsonl::Record.from_record(rec1)
|
58
|
-
assert_equal(rec1, rec2)
|
59
|
-
end
|
60
|
-
end
|
61
|
-
end
|