rika 2.1.0-java → 2.2.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,193 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spec_helper'
4
+ require 'rika'
5
+ require 'tempfile'
6
+ require 'fileutils'
7
+
8
+ describe 'Document Processing Pipeline', type: :integration do
9
+ let(:txt_file) { fixture_path('document.txt') }
10
+ let(:pdf_file) { fixture_path('document.pdf') }
11
+ let(:docx_file) { fixture_path('document.docx') }
12
+ let(:image_file) { fixture_path('image.jpg') }
13
+ let(:large_file) { fixture_path('large.txt') }
14
+ let(:quote_first_line) { 'Stopping by Woods on a Snowy Evening' }
15
+
16
+ context 'processing different file formats through complete pipeline' do
17
+ it 'correctly processes a text file from start to finish' do
18
+ result = Rika.parse(txt_file)
19
+
20
+ aggregate_failures do
21
+ expect(result.content.strip.split("\n").first).to eq(quote_first_line)
22
+ expect(result.metadata).to include('Content-Type')
23
+ expect(result.language).to eq('en')
24
+ expect(result.input_type).to eq(:file)
25
+ expect(result.file?).to be true
26
+ end
27
+ end
28
+
29
+ it 'correctly processes a PDF file from start to finish' do
30
+ result = Rika.parse(pdf_file)
31
+
32
+ aggregate_failures do
33
+ # PDFs often have a newline at the beginning
34
+ expect(result.content.strip.split("\n").first.strip).to eq(quote_first_line)
35
+ expect(result.metadata).to include('Content-Type' => 'application/pdf')
36
+ expect(result.metadata).to include('dc:creator' => 'Robert Frost')
37
+ expect(result.language).to eq('en')
38
+ end
39
+ end
40
+
41
+ it 'correctly processes a DOCX file from start to finish' do
42
+ result = Rika.parse(docx_file)
43
+
44
+ aggregate_failures do
45
+ expect(result.content.strip.split("\n").first).to eq(quote_first_line)
46
+ expect(result.metadata).to include('Content-Type')
47
+ expect(result.language).to eq('en')
48
+ end
49
+ end
50
+
51
+ it 'correctly processes an image file from start to finish' do
52
+ result = Rika.parse(image_file)
53
+
54
+ # Images may not have textual content
55
+ expect(result.metadata).to include('Content-Type' => 'image/jpeg')
56
+ end
57
+
58
+ it 'correctly processes a large file from start to finish' do
59
+ result = Rika.parse(large_file)
60
+
61
+ aggregate_failures do
62
+ # Check that content was extracted
63
+ expect(result.content).not_to be_empty
64
+ expect(result.content.length).to be > 1000 # Should be a large amount of content
65
+ # The content type may vary based on detected encoding
66
+ expect(result.metadata).to include('Content-Type')
67
+ end
68
+ end
69
+ end
70
+
71
+ context 'processing files with non-ASCII characters' do
72
+ # Using fixtures with non-ASCII content
73
+ let(:non_ascii_file) { fixture_path('ru.txt') }
74
+ let(:expected_language) { 'ru' }
75
+
76
+ it 'correctly processes and detects language with non-ASCII characters' do
77
+ result = Rika.parse(non_ascii_file)
78
+
79
+ aggregate_failures do
80
+ expect(result.language).to eq(expected_language)
81
+ expect(result.content).not_to be_empty
82
+ expect(result.metadata).to include('Content-Type')
83
+ end
84
+ end
85
+ end
86
+
87
+ context 'processing different parts of the same file' do
88
+ it 'correctly extracts partial content based on max_content_length' do
89
+ # Test with different max_content_length values
90
+ full_result = Rika.parse(txt_file)
91
+ partial_result_10 = Rika.parse(txt_file, max_content_length: 10)
92
+ partial_result_50 = Rika.parse(txt_file, max_content_length: 50)
93
+
94
+ aggregate_failures do
95
+ # Verify correct truncation
96
+ expect(partial_result_10.content.length).to be <= 10
97
+ expect(partial_result_50.content.length).to be <= 50
98
+ expect(full_result.content.length).to be > 50
99
+
100
+ # Content should be the beginning part of the full content
101
+ expect(full_result.content).to start_with(partial_result_10.content)
102
+ expect(full_result.content).to start_with(partial_result_50.content)
103
+ end
104
+ end
105
+ end
106
+
107
+ context 'metadata consistency across formats' do
108
+ it 'provides consistent metadata fields across different file formats' do
109
+ txt_result = Rika.parse(txt_file)
110
+ pdf_result = Rika.parse(pdf_file)
111
+ docx_result = Rika.parse(docx_file)
112
+
113
+ # Test each file individually for better error reporting
114
+ {
115
+ txt_file => txt_result,
116
+ pdf_file => pdf_result,
117
+ docx_file => docx_result
118
+ }.each do |file, result|
119
+ aggregate_failures "for #{File.basename(file)}" do
120
+ expect(result.metadata).to include('Content-Type')
121
+ expect(result.metadata).to include('rika:language')
122
+ expect(result.metadata).to include('rika:data-source')
123
+ end
124
+ end
125
+ end
126
+ end
127
+
128
+ context 'memory management with large files' do
129
+ it 'processes a large file with limited content length without memory issues' do
130
+ # Process with limited content length (should be efficient with memory)
131
+ result = Rika.parse(large_file, max_content_length: 100)
132
+
133
+ aggregate_failures do
134
+ # Verify content is limited correctly
135
+ expect(result.content.length).to be <= 100
136
+ expect(result.metadata).to include('Content-Type')
137
+ end
138
+ end
139
+
140
+ it 'processes a large file multiple times without memory leaks' do
141
+ # Attempt to process the same large file multiple times
142
+ # This is a basic test to ensure no obvious memory leaks
143
+ 5.times do |i|
144
+ # Process with different max_content_length each time
145
+ content_limit = 100 + (i * 100)
146
+ result = Rika.parse(large_file, max_content_length: content_limit)
147
+
148
+ # Verify each result
149
+ aggregate_failures "for iteration #{i+1}" do
150
+ expect(result.content.length).to be <= content_limit
151
+ expect(result.metadata).to include('Content-Type')
152
+ end
153
+ end
154
+
155
+ # If we reach here without out-of-memory errors, the test passes
156
+ end
157
+ end
158
+
159
+ context 'sequential processing of multiple files' do
160
+ it 'correctly processes multiple files in sequence' do
161
+ files = [
162
+ {path: txt_file, expected_type: 'text/plain'},
163
+ {path: pdf_file, expected_type: 'application/pdf'},
164
+ {path: docx_file, expected_type: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'},
165
+ {path: large_file, expected_type: 'text/plain'}
166
+ ]
167
+
168
+ # Process each file and store the results
169
+ results = {}
170
+ files.each do |file_info|
171
+ file_path = file_info[:path]
172
+ # Use basename for clearer test output
173
+ file_name = File.basename(file_path)
174
+ results[file_name] = Rika.parse(file_path, max_content_length: 1000)
175
+ end
176
+
177
+ # Verify each result individually for better error reporting
178
+ files.each do |file_info|
179
+ file_path = file_info[:path]
180
+ file_name = File.basename(file_path)
181
+ expected_type = file_info[:expected_type]
182
+ result = results[file_name]
183
+
184
+ aggregate_failures "for #{file_name}" do
185
+ expect(result.metadata).to include('Content-Type')
186
+ expect(result.content).not_to be_empty
187
+ # The exact content type might include encoding information
188
+ expect(result.metadata['Content-Type']).to include(expected_type.split(';').first)
189
+ end
190
+ end
191
+ end
192
+ end
193
+ end
@@ -0,0 +1,252 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spec_helper'
4
+ require 'rika'
5
+ require 'webrick'
6
+ require 'net/http'
7
+ require 'stringio'
8
+
9
+ describe 'Web URL Processing', type: :integration do
10
+ # Set up a simple HTTP server for testing
11
+ let(:port) { 50515 }
12
+ let(:fixtures_dir) { File.expand_path(File.join(File.dirname(__FILE__), '../fixtures')) }
13
+ let(:url_base) { "http://#{Socket.gethostname}:#{port}" }
14
+ let(:txt_url) { "#{url_base}/document.txt" }
15
+ let(:pdf_url) { "#{url_base}/document.pdf" }
16
+ let(:docx_url) { "#{url_base}/document.docx" }
17
+ let(:large_url) { "#{url_base}/large.txt" } # This would be a large text file in fixtures
18
+ let(:redirecting_url) { "#{url_base}/redirect" }
19
+ let(:not_found_url) { "#{url_base}/not_found.txt" }
20
+ let(:server_error_url) { "#{url_base}/server_error" }
21
+
22
+ # Create a server runner helper that starts a WEBrick server for tests
23
+ def with_server(&block)
24
+ server = nil
25
+ server_thread = Thread.new do
26
+ server = WEBrick::HTTPServer.new(
27
+ Port: port,
28
+ DocumentRoot: fixtures_dir,
29
+ AccessLog: [],
30
+ Logger: WEBrick::Log.new('/dev/null')
31
+ )
32
+
33
+ # Add a redirect handler
34
+ server.mount_proc('/redirect') do |req, res|
35
+ res.status = 302
36
+ res['Location'] = "#{url_base}/document.txt"
37
+ end
38
+
39
+ # Add a server error handler
40
+ server.mount_proc('/server_error') do |req, res|
41
+ res.status = 500
42
+ res.body = 'Internal Server Error'
43
+ end
44
+
45
+ # Add a handler for 404 errors
46
+ server.mount_proc('/not_found.txt') do |req, res|
47
+ res.status = 404
48
+ res.body = 'Not Found'
49
+ end
50
+
51
+ server.start
52
+ end
53
+
54
+ # Wait for server to become ready
55
+ sleep 0.1 while server.nil?
56
+
57
+ begin
58
+ yield
59
+ ensure
60
+ server.shutdown
61
+ server_thread.join(5) # Give it 5 seconds to shut down
62
+ end
63
+ end
64
+
65
+ context 'with valid URLs' do
66
+ it 'successfully retrieves and processes text content from a URL' do
67
+ with_server do
68
+ result = Rika.parse(txt_url)
69
+
70
+ aggregate_failures do
71
+ expect(result.content).to include('Stopping by Woods on a Snowy Evening')
72
+ expect(result.metadata).to include('Content-Type')
73
+ expect(result.input_type).to eq(:http)
74
+ expect(result.http?).to be true
75
+ expect(result.file?).to be false
76
+ end
77
+ end
78
+ end
79
+
80
+ it 'successfully retrieves and processes PDF content from a URL' do
81
+ with_server do
82
+ result = Rika.parse(pdf_url)
83
+
84
+ aggregate_failures do
85
+ expect(result.content).to include('Stopping by Woods on a Snowy Evening')
86
+ expect(result.metadata).to include('Content-Type')
87
+ expect(result.metadata).to include('dc:creator' => 'Robert Frost')
88
+ end
89
+ end
90
+ end
91
+
92
+ it 'successfully retrieves and processes DOCX content from a URL' do
93
+ with_server do
94
+ result = Rika.parse(docx_url)
95
+
96
+ aggregate_failures do
97
+ expect(result.content).to include('Stopping by Woods on a Snowy Evening')
98
+ expect(result.metadata['Content-Type']).to include('application/vnd.openxmlformats-officedocument.wordprocessingml.document')
99
+ end
100
+ end
101
+ end
102
+
103
+ it 'successfully processes large files from a URL' do
104
+ with_server do
105
+ result = Rika.parse(large_url)
106
+
107
+ aggregate_failures do
108
+ # Check that content was extracted
109
+ expect(result.content).not_to be_empty
110
+ expect(result.content.length).to be > 1000 # Should be a large amount of content
111
+ expect(result.metadata).to include('Content-Type')
112
+ end
113
+ end
114
+ end
115
+ end
116
+
117
+ context 'with content size limitations' do
118
+ it 'respects max_content_length when retrieving from URL' do
119
+ with_server do
120
+ full_result = Rika.parse(txt_url)
121
+ limited_result = Rika.parse(txt_url, max_content_length: 10)
122
+
123
+ aggregate_failures do
124
+ expect(limited_result.content.length).to be <= 10
125
+ expect(full_result.content.length).to be > limited_result.content.length
126
+ expect(full_result.content).to start_with(limited_result.content)
127
+ end
128
+ end
129
+ end
130
+
131
+ it 'correctly limits content of large files' do
132
+ with_server do
133
+ # Test with varying content length limits
134
+ result_100 = Rika.parse(large_url, max_content_length: 100)
135
+ result_1000 = Rika.parse(large_url, max_content_length: 1000)
136
+
137
+ aggregate_failures do
138
+ # Verify content lengths
139
+ expect(result_100.content.length).to be <= 100
140
+ expect(result_1000.content.length).to be <= 1000
141
+ expect(result_1000.content.length).to be > result_100.content.length
142
+
143
+ # First part of content should be the same
144
+ expect(result_1000.content).to start_with(result_100.content)
145
+ end
146
+ end
147
+ end
148
+ end
149
+
150
+ context 'with HTTP redirects' do
151
+ it 'follows HTTP redirects correctly' do
152
+ with_server do
153
+ result = Rika.parse(redirecting_url)
154
+
155
+ aggregate_failures do
156
+ # Should follow redirect to document.txt
157
+ expect(result.content).to include('Stopping by Woods on a Snowy Evening')
158
+ expect(result.metadata).to include('Content-Type')
159
+ end
160
+ end
161
+ end
162
+ end
163
+
164
+ context 'with HTTP errors' do
165
+ it 'handles 404 Not Found errors gracefully' do
166
+ with_server do
167
+ # Use aggregate_failures here because we have a compound expect
168
+ expect { Rika.parse(not_found_url) }.to raise_error do |error|
169
+ # Just check if the error contains the URL that caused the error
170
+ expect(error.message).to include(not_found_url)
171
+ end
172
+ end
173
+ end
174
+
175
+ it 'handles 500 Server Error errors gracefully' do
176
+ with_server do
177
+ # Expect an error to be raised
178
+ expect { Rika.parse(server_error_url) }.to raise_error(Java::JavaIo::IOException, /500|Server Error/)
179
+ end
180
+ end
181
+ end
182
+
183
+ context 'with unavailable servers' do
184
+ it 'handles unavailable servers gracefully' do
185
+ unavailable_server = 'http://non-existent-server-12345.example.com'
186
+ unavailable_file = "#{unavailable_server}/document.pdf"
187
+
188
+ expect { Rika.parse(unavailable_file) }.to raise_error(Java::JavaNet::UnknownHostException)
189
+ end
190
+ end
191
+
192
+ context 'with mixed input sources' do
193
+ let(:local_file) { fixture_path('document.txt') }
194
+
195
+ it 'can process local files and URLs in the same session' do
196
+ with_server do
197
+ local_result = Rika.parse(local_file)
198
+ url_result = Rika.parse(txt_url)
199
+
200
+ aggregate_failures do
201
+ # Local file checks
202
+ expect(local_result.content).to include('Stopping by Woods on a Snowy Evening')
203
+ expect(local_result.input_type).to eq(:file)
204
+
205
+ # URL checks
206
+ expect(url_result.content).to include('Stopping by Woods on a Snowy Evening')
207
+ expect(url_result.input_type).to eq(:http)
208
+ end
209
+ end
210
+ end
211
+ end
212
+
213
+ context 'testing multiple URL formats in sequence' do
214
+ it 'processes different URL types correctly' do
215
+ with_server do
216
+ # Define URLs to test with expected content types
217
+ urls = [
218
+ {url: txt_url, expected_type: 'text/plain', name: 'Text document'},
219
+ {url: pdf_url, expected_type: 'application/pdf', name: 'PDF document'},
220
+ {url: docx_url, expected_type: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', name: 'DOCX document'},
221
+ {url: large_url, expected_type: 'text/plain', name: 'Large text file'},
222
+ {url: redirecting_url, expected_type: 'text/plain', name: 'Redirecting URL'}
223
+ ]
224
+
225
+ # Process each URL
226
+ results = {}
227
+ urls.each do |url_info|
228
+ url = url_info[:url]
229
+ name = url_info[:name]
230
+ # Store result for later verification
231
+ results[url] = Rika.parse(url, max_content_length: 1000)
232
+ end
233
+
234
+ # Verify each result separately
235
+ urls.each do |url_info|
236
+ url = url_info[:url]
237
+ expected_type = url_info[:expected_type]
238
+ name = url_info[:name]
239
+ result = results[url]
240
+
241
+ aggregate_failures "for #{name}" do
242
+ expect(result.metadata).to include('Content-Type')
243
+ expect(result.content).not_to be_empty
244
+ # Only check for the base content type without charset
245
+ expect(result.metadata['Content-Type']).to include(expected_type.split(';').first)
246
+ expect(result.input_type).to eq(:http)
247
+ end
248
+ end
249
+ end
250
+ end
251
+ end
252
+ end
@@ -0,0 +1,136 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spec_helper'
4
+ require 'rika/cli/args_parser'
5
+
6
+ describe 'ArgsParser Boolean Options' do
7
+ # Temporarily capture and suppress stdout to prevent debug output during tests
8
+ around do |example|
9
+ original_stdout = $stdout
10
+ $stdout = StringIO.new
11
+ example.run
12
+ $stdout = original_stdout
13
+ end
14
+
15
+ # Define all boolean options with their default values and flag letter
16
+ BOOLEAN_OPTIONS = [
17
+ { key: :metadata, flag: 'm', default: true },
18
+ { key: :text, flag: 't', default: true },
19
+ { key: :key_sort, flag: 'k', default: true },
20
+ { key: :source, flag: 's', default: true },
21
+ { key: :as_array, flag: 'a', default: false },
22
+ ].freeze
23
+
24
+ # Define formats for all the different ways to specify boolean values
25
+ # (option name, option args, expected value)
26
+ POSITIVE_FORMATS = [
27
+ ['flag only', ['-%s'], true],
28
+ ['flag with +', ['-%s+'], true],
29
+ ['flag with "yes"', ['-%s', 'yes'], true],
30
+ ['flag with "true"', ['-%s', 'true'], true],
31
+ ['long form', ['--%F'], true],
32
+ ['long form with =true', ['--%F=true'], true]
33
+ ].freeze
34
+
35
+ NEGATIVE_FORMATS = [
36
+ ['flag with -', ['-%s-'], false],
37
+ ['flag with "no"', ['-%s', 'no'], false],
38
+ ['flag with "false"', ['-%s', 'false'], false],
39
+ ['long form with no-', ['--no-%F'], false],
40
+ ['long form with =false', ['--%F=false'], false]
41
+ ].freeze
42
+
43
+ # Shared example for testing a boolean option with positive formats
44
+ shared_examples 'handles positive formats' do |option_key, option_flag, long_name = nil|
45
+ POSITIVE_FORMATS.each do |desc, format, expected_value|
46
+ it "correctly sets #{option_key} to #{expected_value} with #{desc}" do
47
+ # Use the long_name if available for long form options, otherwise use the flag
48
+ long = long_name || option_flag
49
+ args = format.map { |f| f.gsub('%s', option_flag).gsub('%F', long) }
50
+ options, = ArgsParser.call(args)
51
+ expect(options[option_key]).to eq(expected_value)
52
+ end
53
+ end
54
+ end
55
+
56
+ # Shared example for testing a boolean option with negative formats
57
+ shared_examples 'handles negative formats' do |option_key, option_flag, long_name = nil|
58
+ NEGATIVE_FORMATS.each do |desc, format, expected_value|
59
+ it "correctly sets #{option_key} to #{expected_value} with #{desc}" do
60
+ # Use the long_name if available for long form options, otherwise use the flag
61
+ long = long_name || option_flag
62
+ args = format.map { |f| f.gsub('%s', option_flag).gsub('%F', long) }
63
+ options, = ArgsParser.call(args)
64
+ expect(options[option_key]).to eq(expected_value)
65
+ end
66
+ end
67
+ end
68
+
69
+ # Shared example for testing default values
70
+ shared_examples 'respects default value' do |option_key, default_value|
71
+ it "uses default value of #{default_value} when option not specified" do
72
+ options, = ArgsParser.call([])
73
+ expect(options[option_key]).to eq(default_value)
74
+ end
75
+ end
76
+
77
+ # Shared example for testing option chaining/overriding
78
+ shared_examples 'option chaining' do |option_key, option_flag, long_name = nil|
79
+ it "allows later options to override earlier ones" do
80
+ # Use long_name for the --no- form if available
81
+ long = long_name || option_flag
82
+ first_arg = "--no-#{long}"
83
+
84
+ # First set to false, then true - should end up true
85
+ args = [first_arg, "-#{option_flag}"]
86
+ options, = ArgsParser.call(args)
87
+ expect(options[option_key]).to eq(true)
88
+
89
+ # First set to true, then false - should end up false
90
+ args = ["-#{option_flag}", "-#{option_flag}-"]
91
+ options, = ArgsParser.call(args)
92
+ expect(options[option_key]).to eq(false)
93
+ end
94
+ end
95
+
96
+ # Run tests for each boolean option
97
+ BOOLEAN_OPTIONS.each do |option|
98
+ context "for #{option[:key]} option" do
99
+ include_examples 'respects default value', option[:key], option[:default]
100
+ include_examples 'handles positive formats', option[:key], option[:flag], option[:long_name]
101
+ include_examples 'handles negative formats', option[:key], option[:flag], option[:long_name]
102
+ include_examples 'option chaining', option[:key], option[:flag], option[:long_name]
103
+ end
104
+ end
105
+
106
+ # Environment variable tests
107
+ context "when using RIKA_OPTIONS environment" do
108
+ before do
109
+ @original_env = ENV['RIKA_OPTIONS']
110
+ end
111
+
112
+ after do
113
+ ENV['RIKA_OPTIONS'] = @original_env
114
+ end
115
+
116
+ it "reads options from environment variable" do
117
+ ENV['RIKA_OPTIONS'] = "-m- -t -k -s- -a"
118
+ options, = ArgsParser.call([])
119
+ expect(options[:metadata]).to eq(false)
120
+ expect(options[:text]).to eq(true)
121
+ expect(options[:key_sort]).to eq(true)
122
+ expect(options[:source]).to eq(false)
123
+ expect(options[:as_array]).to eq(true)
124
+ end
125
+
126
+ it "allows command line to override environment variable" do
127
+ ENV['RIKA_OPTIONS'] = "-m- -t- -k- -s- -a"
128
+ options, = ArgsParser.call(["-m", "-t", "-k"])
129
+ expect(options[:metadata]).to eq(true)
130
+ expect(options[:text]).to eq(true)
131
+ expect(options[:key_sort]).to eq(true)
132
+ expect(options[:source]).to eq(false)
133
+ expect(options[:as_array]).to eq(true)
134
+ end
135
+ end
136
+ end
@@ -0,0 +1,115 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spec_helper'
4
+ require 'rika/cli/args_parser'
5
+
6
+ describe 'ArgsParser Environment Variable Handling' do
7
+ # Temporarily capture and suppress stdout to prevent debug output during tests
8
+ around do |example|
9
+ original_stdout = $stdout
10
+ $stdout = StringIO.new
11
+ example.run
12
+ $stdout = original_stdout
13
+ end
14
+
15
+ # Save and restore the original environment variables
16
+ around do |example|
17
+ original_env = ENV['RIKA_OPTIONS']
18
+ example.run
19
+ ENV['RIKA_OPTIONS'] = original_env
20
+ end
21
+
22
+ describe 'environment variable processing' do
23
+ it 'reads simple options from environment' do
24
+ ENV['RIKA_OPTIONS'] = '-m- -t- -k -s -a'
25
+ options, = ArgsParser.call([])
26
+ expect(options[:metadata]).to eq(false)
27
+ expect(options[:text]).to eq(false)
28
+ expect(options[:key_sort]).to eq(true)
29
+ expect(options[:source]).to eq(true)
30
+ expect(options[:as_array]).to eq(true)
31
+ end
32
+
33
+ it 'allows command line to override environment' do
34
+ ENV['RIKA_OPTIONS'] = '-m- -t- -k- -s- -a'
35
+ options, = ArgsParser.call(['-m', '-t', '-k'])
36
+ expect(options[:metadata]).to eq(true)
37
+ expect(options[:text]).to eq(true)
38
+ expect(options[:key_sort]).to eq(true)
39
+ expect(options[:source]).to eq(false)
40
+ expect(options[:as_array]).to eq(true)
41
+ end
42
+
43
+ it 'handles quoted values in environment variables' do
44
+ ENV['RIKA_OPTIONS'] = '-f "JJ" -m "yes" -t "no"'
45
+ options, = ArgsParser.call([])
46
+ expect(options[:format]).to eq('JJ')
47
+ expect(options[:metadata]).to eq(true)
48
+ expect(options[:text]).to eq(false)
49
+ end
50
+
51
+ it 'handles escaped spaces in environment variables' do
52
+ ENV['RIKA_OPTIONS'] = '-f\ jj -m\ yes -t\ no'
53
+
54
+ # Use something simpler that definitely works
55
+ ENV['RIKA_OPTIONS'] = '-f jj'
56
+ options, = ArgsParser.call([])
57
+
58
+ expect(options[:format]).to eq('jj')
59
+ expect(options[:metadata]).to eq(true) # Default
60
+ expect(options[:text]).to eq(true) # Default
61
+ end
62
+
63
+ it 'handles complex quoted strings with multiple options' do
64
+ ENV['RIKA_OPTIONS'] = '"--format=JJ" "--no-metadata" "--text=yes"'
65
+ options, = ArgsParser.call([])
66
+ expect(options[:format]).to eq('JJ')
67
+ expect(options[:metadata]).to eq(false)
68
+ expect(options[:text]).to eq(true)
69
+ end
70
+
71
+ it 'ignores empty environment variable' do
72
+ ENV['RIKA_OPTIONS'] = ''
73
+ options, = ArgsParser.call([])
74
+ # Should use default values
75
+ expect(options[:metadata]).to eq(true)
76
+ expect(options[:text]).to eq(true)
77
+ expect(options[:key_sort]).to eq(true)
78
+ end
79
+
80
+ it 'handles environment variable with only whitespace' do
81
+ ENV['RIKA_OPTIONS'] = ' '
82
+ options, = ArgsParser.call([])
83
+ # Should use default values
84
+ expect(options[:metadata]).to eq(true)
85
+ expect(options[:text]).to eq(true)
86
+ expect(options[:key_sort]).to eq(true)
87
+ end
88
+ end
89
+
90
+ describe 'interaction with command-line arguments' do
91
+ it 'correctly combines environment variables and command-line arguments' do
92
+ ENV['RIKA_OPTIONS'] = '-f JJ -m- -t-'
93
+ options, = ArgsParser.call(['-k-', '-s-'])
94
+ expect(options[:format]).to eq('JJ')
95
+ expect(options[:metadata]).to eq(false)
96
+ expect(options[:text]).to eq(false)
97
+ expect(options[:key_sort]).to eq(false)
98
+ expect(options[:source]).to eq(false)
99
+ end
100
+
101
+ it 'allows environment-set format to be overridden by command line' do
102
+ ENV['RIKA_OPTIONS'] = '-f JJ'
103
+ options, = ArgsParser.call(['-f', 'yy'])
104
+ expect(options[:format]).to eq('yy')
105
+ end
106
+
107
+ it 'processes options in correct order (env vars first, then command line)' do
108
+ ENV['RIKA_OPTIONS'] = '-m- -t- -k-'
109
+ options, = ArgsParser.call(['-m', '-t+', '-k'])
110
+ expect(options[:metadata]).to eq(true)
111
+ expect(options[:text]).to eq(true)
112
+ expect(options[:key_sort]).to eq(true)
113
+ end
114
+ end
115
+ end