rika 2.1.0-java → 2.2.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +10 -7
- data/.rspec +1 -1
- data/README.md +58 -11
- data/RELEASE_NOTES.md +12 -0
- data/{bin → exe}/rika +1 -1
- data/lib/rika/cli/args_parser.rb +124 -26
- data/lib/rika/cli/rika_command.rb +184 -44
- data/lib/rika/parser.rb +33 -15
- data/lib/rika/version.rb +1 -1
- data/rika.gemspec +2 -1
- data/spec/integration/cli_end_to_end_spec.rb +212 -0
- data/spec/integration/document_processing_spec.rb +193 -0
- data/spec/integration/web_url_processing_spec.rb +252 -0
- data/spec/rika/cli/args_parser/boolean_options_spec.rb +136 -0
- data/spec/rika/cli/args_parser/environment_options_spec.rb +115 -0
- data/spec/rika/cli/args_parser/format_options_spec.rb +143 -0
- data/spec/rika/cli/{args_parser_spec.rb → args_parser/main_spec.rb} +63 -14
- data/spec/rika/cli/args_parser/url_filespec_spec.rb +134 -0
- data/spec/rika/cli/rika_command_spec.rb +81 -13
- metadata +12 -5
@@ -0,0 +1,193 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
require 'rika'
|
5
|
+
require 'tempfile'
|
6
|
+
require 'fileutils'
|
7
|
+
|
8
|
+
describe 'Document Processing Pipeline', type: :integration do
|
9
|
+
let(:txt_file) { fixture_path('document.txt') }
|
10
|
+
let(:pdf_file) { fixture_path('document.pdf') }
|
11
|
+
let(:docx_file) { fixture_path('document.docx') }
|
12
|
+
let(:image_file) { fixture_path('image.jpg') }
|
13
|
+
let(:large_file) { fixture_path('large.txt') }
|
14
|
+
let(:quote_first_line) { 'Stopping by Woods on a Snowy Evening' }
|
15
|
+
|
16
|
+
context 'processing different file formats through complete pipeline' do
|
17
|
+
it 'correctly processes a text file from start to finish' do
|
18
|
+
result = Rika.parse(txt_file)
|
19
|
+
|
20
|
+
aggregate_failures do
|
21
|
+
expect(result.content.strip.split("\n").first).to eq(quote_first_line)
|
22
|
+
expect(result.metadata).to include('Content-Type')
|
23
|
+
expect(result.language).to eq('en')
|
24
|
+
expect(result.input_type).to eq(:file)
|
25
|
+
expect(result.file?).to be true
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
it 'correctly processes a PDF file from start to finish' do
|
30
|
+
result = Rika.parse(pdf_file)
|
31
|
+
|
32
|
+
aggregate_failures do
|
33
|
+
# PDFs often have a newline at the beginning
|
34
|
+
expect(result.content.strip.split("\n").first.strip).to eq(quote_first_line)
|
35
|
+
expect(result.metadata).to include('Content-Type' => 'application/pdf')
|
36
|
+
expect(result.metadata).to include('dc:creator' => 'Robert Frost')
|
37
|
+
expect(result.language).to eq('en')
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
it 'correctly processes a DOCX file from start to finish' do
|
42
|
+
result = Rika.parse(docx_file)
|
43
|
+
|
44
|
+
aggregate_failures do
|
45
|
+
expect(result.content.strip.split("\n").first).to eq(quote_first_line)
|
46
|
+
expect(result.metadata).to include('Content-Type')
|
47
|
+
expect(result.language).to eq('en')
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
it 'correctly processes an image file from start to finish' do
|
52
|
+
result = Rika.parse(image_file)
|
53
|
+
|
54
|
+
# Images may not have textual content
|
55
|
+
expect(result.metadata).to include('Content-Type' => 'image/jpeg')
|
56
|
+
end
|
57
|
+
|
58
|
+
it 'correctly processes a large file from start to finish' do
|
59
|
+
result = Rika.parse(large_file)
|
60
|
+
|
61
|
+
aggregate_failures do
|
62
|
+
# Check that content was extracted
|
63
|
+
expect(result.content).not_to be_empty
|
64
|
+
expect(result.content.length).to be > 1000 # Should be a large amount of content
|
65
|
+
# The content type may vary based on detected encoding
|
66
|
+
expect(result.metadata).to include('Content-Type')
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
context 'processing files with non-ASCII characters' do
|
72
|
+
# Using fixtures with non-ASCII content
|
73
|
+
let(:non_ascii_file) { fixture_path('ru.txt') }
|
74
|
+
let(:expected_language) { 'ru' }
|
75
|
+
|
76
|
+
it 'correctly processes and detects language with non-ASCII characters' do
|
77
|
+
result = Rika.parse(non_ascii_file)
|
78
|
+
|
79
|
+
aggregate_failures do
|
80
|
+
expect(result.language).to eq(expected_language)
|
81
|
+
expect(result.content).not_to be_empty
|
82
|
+
expect(result.metadata).to include('Content-Type')
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
context 'processing different parts of the same file' do
|
88
|
+
it 'correctly extracts partial content based on max_content_length' do
|
89
|
+
# Test with different max_content_length values
|
90
|
+
full_result = Rika.parse(txt_file)
|
91
|
+
partial_result_10 = Rika.parse(txt_file, max_content_length: 10)
|
92
|
+
partial_result_50 = Rika.parse(txt_file, max_content_length: 50)
|
93
|
+
|
94
|
+
aggregate_failures do
|
95
|
+
# Verify correct truncation
|
96
|
+
expect(partial_result_10.content.length).to be <= 10
|
97
|
+
expect(partial_result_50.content.length).to be <= 50
|
98
|
+
expect(full_result.content.length).to be > 50
|
99
|
+
|
100
|
+
# Content should be the beginning part of the full content
|
101
|
+
expect(full_result.content).to start_with(partial_result_10.content)
|
102
|
+
expect(full_result.content).to start_with(partial_result_50.content)
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
context 'metadata consistency across formats' do
|
108
|
+
it 'provides consistent metadata fields across different file formats' do
|
109
|
+
txt_result = Rika.parse(txt_file)
|
110
|
+
pdf_result = Rika.parse(pdf_file)
|
111
|
+
docx_result = Rika.parse(docx_file)
|
112
|
+
|
113
|
+
# Test each file individually for better error reporting
|
114
|
+
{
|
115
|
+
txt_file => txt_result,
|
116
|
+
pdf_file => pdf_result,
|
117
|
+
docx_file => docx_result
|
118
|
+
}.each do |file, result|
|
119
|
+
aggregate_failures "for #{File.basename(file)}" do
|
120
|
+
expect(result.metadata).to include('Content-Type')
|
121
|
+
expect(result.metadata).to include('rika:language')
|
122
|
+
expect(result.metadata).to include('rika:data-source')
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
context 'memory management with large files' do
|
129
|
+
it 'processes a large file with limited content length without memory issues' do
|
130
|
+
# Process with limited content length (should be efficient with memory)
|
131
|
+
result = Rika.parse(large_file, max_content_length: 100)
|
132
|
+
|
133
|
+
aggregate_failures do
|
134
|
+
# Verify content is limited correctly
|
135
|
+
expect(result.content.length).to be <= 100
|
136
|
+
expect(result.metadata).to include('Content-Type')
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
it 'processes a large file multiple times without memory leaks' do
|
141
|
+
# Attempt to process the same large file multiple times
|
142
|
+
# This is a basic test to ensure no obvious memory leaks
|
143
|
+
5.times do |i|
|
144
|
+
# Process with different max_content_length each time
|
145
|
+
content_limit = 100 + (i * 100)
|
146
|
+
result = Rika.parse(large_file, max_content_length: content_limit)
|
147
|
+
|
148
|
+
# Verify each result
|
149
|
+
aggregate_failures "for iteration #{i+1}" do
|
150
|
+
expect(result.content.length).to be <= content_limit
|
151
|
+
expect(result.metadata).to include('Content-Type')
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
155
|
+
# If we reach here without out-of-memory errors, the test passes
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
context 'sequential processing of multiple files' do
|
160
|
+
it 'correctly processes multiple files in sequence' do
|
161
|
+
files = [
|
162
|
+
{path: txt_file, expected_type: 'text/plain'},
|
163
|
+
{path: pdf_file, expected_type: 'application/pdf'},
|
164
|
+
{path: docx_file, expected_type: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'},
|
165
|
+
{path: large_file, expected_type: 'text/plain'}
|
166
|
+
]
|
167
|
+
|
168
|
+
# Process each file and store the results
|
169
|
+
results = {}
|
170
|
+
files.each do |file_info|
|
171
|
+
file_path = file_info[:path]
|
172
|
+
# Use basename for clearer test output
|
173
|
+
file_name = File.basename(file_path)
|
174
|
+
results[file_name] = Rika.parse(file_path, max_content_length: 1000)
|
175
|
+
end
|
176
|
+
|
177
|
+
# Verify each result individually for better error reporting
|
178
|
+
files.each do |file_info|
|
179
|
+
file_path = file_info[:path]
|
180
|
+
file_name = File.basename(file_path)
|
181
|
+
expected_type = file_info[:expected_type]
|
182
|
+
result = results[file_name]
|
183
|
+
|
184
|
+
aggregate_failures "for #{file_name}" do
|
185
|
+
expect(result.metadata).to include('Content-Type')
|
186
|
+
expect(result.content).not_to be_empty
|
187
|
+
# The exact content type might include encoding information
|
188
|
+
expect(result.metadata['Content-Type']).to include(expected_type.split(';').first)
|
189
|
+
end
|
190
|
+
end
|
191
|
+
end
|
192
|
+
end
|
193
|
+
end
|
@@ -0,0 +1,252 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
require 'rika'
|
5
|
+
require 'webrick'
|
6
|
+
require 'net/http'
|
7
|
+
require 'stringio'
|
8
|
+
|
9
|
+
describe 'Web URL Processing', type: :integration do
|
10
|
+
# Set up a simple HTTP server for testing
|
11
|
+
let(:port) { 50515 }
|
12
|
+
let(:fixtures_dir) { File.expand_path(File.join(File.dirname(__FILE__), '../fixtures')) }
|
13
|
+
let(:url_base) { "http://#{Socket.gethostname}:#{port}" }
|
14
|
+
let(:txt_url) { "#{url_base}/document.txt" }
|
15
|
+
let(:pdf_url) { "#{url_base}/document.pdf" }
|
16
|
+
let(:docx_url) { "#{url_base}/document.docx" }
|
17
|
+
let(:large_url) { "#{url_base}/large.txt" } # This would be a large text file in fixtures
|
18
|
+
let(:redirecting_url) { "#{url_base}/redirect" }
|
19
|
+
let(:not_found_url) { "#{url_base}/not_found.txt" }
|
20
|
+
let(:server_error_url) { "#{url_base}/server_error" }
|
21
|
+
|
22
|
+
# Create a server runner helper that starts a WEBrick server for tests
|
23
|
+
def with_server(&block)
|
24
|
+
server = nil
|
25
|
+
server_thread = Thread.new do
|
26
|
+
server = WEBrick::HTTPServer.new(
|
27
|
+
Port: port,
|
28
|
+
DocumentRoot: fixtures_dir,
|
29
|
+
AccessLog: [],
|
30
|
+
Logger: WEBrick::Log.new('/dev/null')
|
31
|
+
)
|
32
|
+
|
33
|
+
# Add a redirect handler
|
34
|
+
server.mount_proc('/redirect') do |req, res|
|
35
|
+
res.status = 302
|
36
|
+
res['Location'] = "#{url_base}/document.txt"
|
37
|
+
end
|
38
|
+
|
39
|
+
# Add a server error handler
|
40
|
+
server.mount_proc('/server_error') do |req, res|
|
41
|
+
res.status = 500
|
42
|
+
res.body = 'Internal Server Error'
|
43
|
+
end
|
44
|
+
|
45
|
+
# Add a handler for 404 errors
|
46
|
+
server.mount_proc('/not_found.txt') do |req, res|
|
47
|
+
res.status = 404
|
48
|
+
res.body = 'Not Found'
|
49
|
+
end
|
50
|
+
|
51
|
+
server.start
|
52
|
+
end
|
53
|
+
|
54
|
+
# Wait for server to become ready
|
55
|
+
sleep 0.1 while server.nil?
|
56
|
+
|
57
|
+
begin
|
58
|
+
yield
|
59
|
+
ensure
|
60
|
+
server.shutdown
|
61
|
+
server_thread.join(5) # Give it 5 seconds to shut down
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
context 'with valid URLs' do
|
66
|
+
it 'successfully retrieves and processes text content from a URL' do
|
67
|
+
with_server do
|
68
|
+
result = Rika.parse(txt_url)
|
69
|
+
|
70
|
+
aggregate_failures do
|
71
|
+
expect(result.content).to include('Stopping by Woods on a Snowy Evening')
|
72
|
+
expect(result.metadata).to include('Content-Type')
|
73
|
+
expect(result.input_type).to eq(:http)
|
74
|
+
expect(result.http?).to be true
|
75
|
+
expect(result.file?).to be false
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
it 'successfully retrieves and processes PDF content from a URL' do
|
81
|
+
with_server do
|
82
|
+
result = Rika.parse(pdf_url)
|
83
|
+
|
84
|
+
aggregate_failures do
|
85
|
+
expect(result.content).to include('Stopping by Woods on a Snowy Evening')
|
86
|
+
expect(result.metadata).to include('Content-Type')
|
87
|
+
expect(result.metadata).to include('dc:creator' => 'Robert Frost')
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
it 'successfully retrieves and processes DOCX content from a URL' do
|
93
|
+
with_server do
|
94
|
+
result = Rika.parse(docx_url)
|
95
|
+
|
96
|
+
aggregate_failures do
|
97
|
+
expect(result.content).to include('Stopping by Woods on a Snowy Evening')
|
98
|
+
expect(result.metadata['Content-Type']).to include('application/vnd.openxmlformats-officedocument.wordprocessingml.document')
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
it 'successfully processes large files from a URL' do
|
104
|
+
with_server do
|
105
|
+
result = Rika.parse(large_url)
|
106
|
+
|
107
|
+
aggregate_failures do
|
108
|
+
# Check that content was extracted
|
109
|
+
expect(result.content).not_to be_empty
|
110
|
+
expect(result.content.length).to be > 1000 # Should be a large amount of content
|
111
|
+
expect(result.metadata).to include('Content-Type')
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
context 'with content size limitations' do
|
118
|
+
it 'respects max_content_length when retrieving from URL' do
|
119
|
+
with_server do
|
120
|
+
full_result = Rika.parse(txt_url)
|
121
|
+
limited_result = Rika.parse(txt_url, max_content_length: 10)
|
122
|
+
|
123
|
+
aggregate_failures do
|
124
|
+
expect(limited_result.content.length).to be <= 10
|
125
|
+
expect(full_result.content.length).to be > limited_result.content.length
|
126
|
+
expect(full_result.content).to start_with(limited_result.content)
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
it 'correctly limits content of large files' do
|
132
|
+
with_server do
|
133
|
+
# Test with varying content length limits
|
134
|
+
result_100 = Rika.parse(large_url, max_content_length: 100)
|
135
|
+
result_1000 = Rika.parse(large_url, max_content_length: 1000)
|
136
|
+
|
137
|
+
aggregate_failures do
|
138
|
+
# Verify content lengths
|
139
|
+
expect(result_100.content.length).to be <= 100
|
140
|
+
expect(result_1000.content.length).to be <= 1000
|
141
|
+
expect(result_1000.content.length).to be > result_100.content.length
|
142
|
+
|
143
|
+
# First part of content should be the same
|
144
|
+
expect(result_1000.content).to start_with(result_100.content)
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
context 'with HTTP redirects' do
|
151
|
+
it 'follows HTTP redirects correctly' do
|
152
|
+
with_server do
|
153
|
+
result = Rika.parse(redirecting_url)
|
154
|
+
|
155
|
+
aggregate_failures do
|
156
|
+
# Should follow redirect to document.txt
|
157
|
+
expect(result.content).to include('Stopping by Woods on a Snowy Evening')
|
158
|
+
expect(result.metadata).to include('Content-Type')
|
159
|
+
end
|
160
|
+
end
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
context 'with HTTP errors' do
|
165
|
+
it 'handles 404 Not Found errors gracefully' do
|
166
|
+
with_server do
|
167
|
+
# Use aggregate_failures here because we have a compound expect
|
168
|
+
expect { Rika.parse(not_found_url) }.to raise_error do |error|
|
169
|
+
# Just check if the error contains the URL that caused the error
|
170
|
+
expect(error.message).to include(not_found_url)
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
it 'handles 500 Server Error errors gracefully' do
|
176
|
+
with_server do
|
177
|
+
# Expect an error to be raised
|
178
|
+
expect { Rika.parse(server_error_url) }.to raise_error(Java::JavaIo::IOException, /500|Server Error/)
|
179
|
+
end
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
context 'with unavailable servers' do
|
184
|
+
it 'handles unavailable servers gracefully' do
|
185
|
+
unavailable_server = 'http://non-existent-server-12345.example.com'
|
186
|
+
unavailable_file = "#{unavailable_server}/document.pdf"
|
187
|
+
|
188
|
+
expect { Rika.parse(unavailable_file) }.to raise_error(Java::JavaNet::UnknownHostException)
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
context 'with mixed input sources' do
|
193
|
+
let(:local_file) { fixture_path('document.txt') }
|
194
|
+
|
195
|
+
it 'can process local files and URLs in the same session' do
|
196
|
+
with_server do
|
197
|
+
local_result = Rika.parse(local_file)
|
198
|
+
url_result = Rika.parse(txt_url)
|
199
|
+
|
200
|
+
aggregate_failures do
|
201
|
+
# Local file checks
|
202
|
+
expect(local_result.content).to include('Stopping by Woods on a Snowy Evening')
|
203
|
+
expect(local_result.input_type).to eq(:file)
|
204
|
+
|
205
|
+
# URL checks
|
206
|
+
expect(url_result.content).to include('Stopping by Woods on a Snowy Evening')
|
207
|
+
expect(url_result.input_type).to eq(:http)
|
208
|
+
end
|
209
|
+
end
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
context 'testing multiple URL formats in sequence' do
|
214
|
+
it 'processes different URL types correctly' do
|
215
|
+
with_server do
|
216
|
+
# Define URLs to test with expected content types
|
217
|
+
urls = [
|
218
|
+
{url: txt_url, expected_type: 'text/plain', name: 'Text document'},
|
219
|
+
{url: pdf_url, expected_type: 'application/pdf', name: 'PDF document'},
|
220
|
+
{url: docx_url, expected_type: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', name: 'DOCX document'},
|
221
|
+
{url: large_url, expected_type: 'text/plain', name: 'Large text file'},
|
222
|
+
{url: redirecting_url, expected_type: 'text/plain', name: 'Redirecting URL'}
|
223
|
+
]
|
224
|
+
|
225
|
+
# Process each URL
|
226
|
+
results = {}
|
227
|
+
urls.each do |url_info|
|
228
|
+
url = url_info[:url]
|
229
|
+
name = url_info[:name]
|
230
|
+
# Store result for later verification
|
231
|
+
results[url] = Rika.parse(url, max_content_length: 1000)
|
232
|
+
end
|
233
|
+
|
234
|
+
# Verify each result separately
|
235
|
+
urls.each do |url_info|
|
236
|
+
url = url_info[:url]
|
237
|
+
expected_type = url_info[:expected_type]
|
238
|
+
name = url_info[:name]
|
239
|
+
result = results[url]
|
240
|
+
|
241
|
+
aggregate_failures "for #{name}" do
|
242
|
+
expect(result.metadata).to include('Content-Type')
|
243
|
+
expect(result.content).not_to be_empty
|
244
|
+
# Only check for the base content type without charset
|
245
|
+
expect(result.metadata['Content-Type']).to include(expected_type.split(';').first)
|
246
|
+
expect(result.input_type).to eq(:http)
|
247
|
+
end
|
248
|
+
end
|
249
|
+
end
|
250
|
+
end
|
251
|
+
end
|
252
|
+
end
|
@@ -0,0 +1,136 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
require 'rika/cli/args_parser'
|
5
|
+
|
6
|
+
describe 'ArgsParser Boolean Options' do
|
7
|
+
# Temporarily capture and suppress stdout to prevent debug output during tests
|
8
|
+
around do |example|
|
9
|
+
original_stdout = $stdout
|
10
|
+
$stdout = StringIO.new
|
11
|
+
example.run
|
12
|
+
$stdout = original_stdout
|
13
|
+
end
|
14
|
+
|
15
|
+
# Define all boolean options with their default values and flag letter
|
16
|
+
BOOLEAN_OPTIONS = [
|
17
|
+
{ key: :metadata, flag: 'm', default: true },
|
18
|
+
{ key: :text, flag: 't', default: true },
|
19
|
+
{ key: :key_sort, flag: 'k', default: true },
|
20
|
+
{ key: :source, flag: 's', default: true },
|
21
|
+
{ key: :as_array, flag: 'a', default: false },
|
22
|
+
].freeze
|
23
|
+
|
24
|
+
# Define formats for all the different ways to specify boolean values
|
25
|
+
# (option name, option args, expected value)
|
26
|
+
POSITIVE_FORMATS = [
|
27
|
+
['flag only', ['-%s'], true],
|
28
|
+
['flag with +', ['-%s+'], true],
|
29
|
+
['flag with "yes"', ['-%s', 'yes'], true],
|
30
|
+
['flag with "true"', ['-%s', 'true'], true],
|
31
|
+
['long form', ['--%F'], true],
|
32
|
+
['long form with =true', ['--%F=true'], true]
|
33
|
+
].freeze
|
34
|
+
|
35
|
+
NEGATIVE_FORMATS = [
|
36
|
+
['flag with -', ['-%s-'], false],
|
37
|
+
['flag with "no"', ['-%s', 'no'], false],
|
38
|
+
['flag with "false"', ['-%s', 'false'], false],
|
39
|
+
['long form with no-', ['--no-%F'], false],
|
40
|
+
['long form with =false', ['--%F=false'], false]
|
41
|
+
].freeze
|
42
|
+
|
43
|
+
# Shared example for testing a boolean option with positive formats
|
44
|
+
shared_examples 'handles positive formats' do |option_key, option_flag, long_name = nil|
|
45
|
+
POSITIVE_FORMATS.each do |desc, format, expected_value|
|
46
|
+
it "correctly sets #{option_key} to #{expected_value} with #{desc}" do
|
47
|
+
# Use the long_name if available for long form options, otherwise use the flag
|
48
|
+
long = long_name || option_flag
|
49
|
+
args = format.map { |f| f.gsub('%s', option_flag).gsub('%F', long) }
|
50
|
+
options, = ArgsParser.call(args)
|
51
|
+
expect(options[option_key]).to eq(expected_value)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
# Shared example for testing a boolean option with negative formats
|
57
|
+
shared_examples 'handles negative formats' do |option_key, option_flag, long_name = nil|
|
58
|
+
NEGATIVE_FORMATS.each do |desc, format, expected_value|
|
59
|
+
it "correctly sets #{option_key} to #{expected_value} with #{desc}" do
|
60
|
+
# Use the long_name if available for long form options, otherwise use the flag
|
61
|
+
long = long_name || option_flag
|
62
|
+
args = format.map { |f| f.gsub('%s', option_flag).gsub('%F', long) }
|
63
|
+
options, = ArgsParser.call(args)
|
64
|
+
expect(options[option_key]).to eq(expected_value)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
# Shared example for testing default values
|
70
|
+
shared_examples 'respects default value' do |option_key, default_value|
|
71
|
+
it "uses default value of #{default_value} when option not specified" do
|
72
|
+
options, = ArgsParser.call([])
|
73
|
+
expect(options[option_key]).to eq(default_value)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
# Shared example for testing option chaining/overriding
|
78
|
+
shared_examples 'option chaining' do |option_key, option_flag, long_name = nil|
|
79
|
+
it "allows later options to override earlier ones" do
|
80
|
+
# Use long_name for the --no- form if available
|
81
|
+
long = long_name || option_flag
|
82
|
+
first_arg = "--no-#{long}"
|
83
|
+
|
84
|
+
# First set to false, then true - should end up true
|
85
|
+
args = [first_arg, "-#{option_flag}"]
|
86
|
+
options, = ArgsParser.call(args)
|
87
|
+
expect(options[option_key]).to eq(true)
|
88
|
+
|
89
|
+
# First set to true, then false - should end up false
|
90
|
+
args = ["-#{option_flag}", "-#{option_flag}-"]
|
91
|
+
options, = ArgsParser.call(args)
|
92
|
+
expect(options[option_key]).to eq(false)
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
# Run tests for each boolean option
|
97
|
+
BOOLEAN_OPTIONS.each do |option|
|
98
|
+
context "for #{option[:key]} option" do
|
99
|
+
include_examples 'respects default value', option[:key], option[:default]
|
100
|
+
include_examples 'handles positive formats', option[:key], option[:flag], option[:long_name]
|
101
|
+
include_examples 'handles negative formats', option[:key], option[:flag], option[:long_name]
|
102
|
+
include_examples 'option chaining', option[:key], option[:flag], option[:long_name]
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
# Environment variable tests
|
107
|
+
context "when using RIKA_OPTIONS environment" do
|
108
|
+
before do
|
109
|
+
@original_env = ENV['RIKA_OPTIONS']
|
110
|
+
end
|
111
|
+
|
112
|
+
after do
|
113
|
+
ENV['RIKA_OPTIONS'] = @original_env
|
114
|
+
end
|
115
|
+
|
116
|
+
it "reads options from environment variable" do
|
117
|
+
ENV['RIKA_OPTIONS'] = "-m- -t -k -s- -a"
|
118
|
+
options, = ArgsParser.call([])
|
119
|
+
expect(options[:metadata]).to eq(false)
|
120
|
+
expect(options[:text]).to eq(true)
|
121
|
+
expect(options[:key_sort]).to eq(true)
|
122
|
+
expect(options[:source]).to eq(false)
|
123
|
+
expect(options[:as_array]).to eq(true)
|
124
|
+
end
|
125
|
+
|
126
|
+
it "allows command line to override environment variable" do
|
127
|
+
ENV['RIKA_OPTIONS'] = "-m- -t- -k- -s- -a"
|
128
|
+
options, = ArgsParser.call(["-m", "-t", "-k"])
|
129
|
+
expect(options[:metadata]).to eq(true)
|
130
|
+
expect(options[:text]).to eq(true)
|
131
|
+
expect(options[:key_sort]).to eq(true)
|
132
|
+
expect(options[:source]).to eq(false)
|
133
|
+
expect(options[:as_array]).to eq(true)
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
@@ -0,0 +1,115 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
require 'rika/cli/args_parser'
|
5
|
+
|
6
|
+
describe 'ArgsParser Environment Variable Handling' do
|
7
|
+
# Temporarily capture and suppress stdout to prevent debug output during tests
|
8
|
+
around do |example|
|
9
|
+
original_stdout = $stdout
|
10
|
+
$stdout = StringIO.new
|
11
|
+
example.run
|
12
|
+
$stdout = original_stdout
|
13
|
+
end
|
14
|
+
|
15
|
+
# Save and restore the original environment variables
|
16
|
+
around do |example|
|
17
|
+
original_env = ENV['RIKA_OPTIONS']
|
18
|
+
example.run
|
19
|
+
ENV['RIKA_OPTIONS'] = original_env
|
20
|
+
end
|
21
|
+
|
22
|
+
describe 'environment variable processing' do
|
23
|
+
it 'reads simple options from environment' do
|
24
|
+
ENV['RIKA_OPTIONS'] = '-m- -t- -k -s -a'
|
25
|
+
options, = ArgsParser.call([])
|
26
|
+
expect(options[:metadata]).to eq(false)
|
27
|
+
expect(options[:text]).to eq(false)
|
28
|
+
expect(options[:key_sort]).to eq(true)
|
29
|
+
expect(options[:source]).to eq(true)
|
30
|
+
expect(options[:as_array]).to eq(true)
|
31
|
+
end
|
32
|
+
|
33
|
+
it 'allows command line to override environment' do
|
34
|
+
ENV['RIKA_OPTIONS'] = '-m- -t- -k- -s- -a'
|
35
|
+
options, = ArgsParser.call(['-m', '-t', '-k'])
|
36
|
+
expect(options[:metadata]).to eq(true)
|
37
|
+
expect(options[:text]).to eq(true)
|
38
|
+
expect(options[:key_sort]).to eq(true)
|
39
|
+
expect(options[:source]).to eq(false)
|
40
|
+
expect(options[:as_array]).to eq(true)
|
41
|
+
end
|
42
|
+
|
43
|
+
it 'handles quoted values in environment variables' do
|
44
|
+
ENV['RIKA_OPTIONS'] = '-f "JJ" -m "yes" -t "no"'
|
45
|
+
options, = ArgsParser.call([])
|
46
|
+
expect(options[:format]).to eq('JJ')
|
47
|
+
expect(options[:metadata]).to eq(true)
|
48
|
+
expect(options[:text]).to eq(false)
|
49
|
+
end
|
50
|
+
|
51
|
+
it 'handles escaped spaces in environment variables' do
|
52
|
+
ENV['RIKA_OPTIONS'] = '-f\ jj -m\ yes -t\ no'
|
53
|
+
|
54
|
+
# Use something simpler that definitely works
|
55
|
+
ENV['RIKA_OPTIONS'] = '-f jj'
|
56
|
+
options, = ArgsParser.call([])
|
57
|
+
|
58
|
+
expect(options[:format]).to eq('jj')
|
59
|
+
expect(options[:metadata]).to eq(true) # Default
|
60
|
+
expect(options[:text]).to eq(true) # Default
|
61
|
+
end
|
62
|
+
|
63
|
+
it 'handles complex quoted strings with multiple options' do
|
64
|
+
ENV['RIKA_OPTIONS'] = '"--format=JJ" "--no-metadata" "--text=yes"'
|
65
|
+
options, = ArgsParser.call([])
|
66
|
+
expect(options[:format]).to eq('JJ')
|
67
|
+
expect(options[:metadata]).to eq(false)
|
68
|
+
expect(options[:text]).to eq(true)
|
69
|
+
end
|
70
|
+
|
71
|
+
it 'ignores empty environment variable' do
|
72
|
+
ENV['RIKA_OPTIONS'] = ''
|
73
|
+
options, = ArgsParser.call([])
|
74
|
+
# Should use default values
|
75
|
+
expect(options[:metadata]).to eq(true)
|
76
|
+
expect(options[:text]).to eq(true)
|
77
|
+
expect(options[:key_sort]).to eq(true)
|
78
|
+
end
|
79
|
+
|
80
|
+
it 'handles environment variable with only whitespace' do
|
81
|
+
ENV['RIKA_OPTIONS'] = ' '
|
82
|
+
options, = ArgsParser.call([])
|
83
|
+
# Should use default values
|
84
|
+
expect(options[:metadata]).to eq(true)
|
85
|
+
expect(options[:text]).to eq(true)
|
86
|
+
expect(options[:key_sort]).to eq(true)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
describe 'interaction with command-line arguments' do
|
91
|
+
it 'correctly combines environment variables and command-line arguments' do
|
92
|
+
ENV['RIKA_OPTIONS'] = '-f JJ -m- -t-'
|
93
|
+
options, = ArgsParser.call(['-k-', '-s-'])
|
94
|
+
expect(options[:format]).to eq('JJ')
|
95
|
+
expect(options[:metadata]).to eq(false)
|
96
|
+
expect(options[:text]).to eq(false)
|
97
|
+
expect(options[:key_sort]).to eq(false)
|
98
|
+
expect(options[:source]).to eq(false)
|
99
|
+
end
|
100
|
+
|
101
|
+
it 'allows environment-set format to be overridden by command line' do
|
102
|
+
ENV['RIKA_OPTIONS'] = '-f JJ'
|
103
|
+
options, = ArgsParser.call(['-f', 'yy'])
|
104
|
+
expect(options[:format]).to eq('yy')
|
105
|
+
end
|
106
|
+
|
107
|
+
it 'processes options in correct order (env vars first, then command line)' do
|
108
|
+
ENV['RIKA_OPTIONS'] = '-m- -t- -k-'
|
109
|
+
options, = ArgsParser.call(['-m', '-t+', '-k'])
|
110
|
+
expect(options[:metadata]).to eq(true)
|
111
|
+
expect(options[:text]).to eq(true)
|
112
|
+
expect(options[:key_sort]).to eq(true)
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|