rika 2.0.4-java → 2.1.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +23 -0
- data/RELEASE_NOTES.md +4 -0
- data/lib/rika/cli/args_parser.rb +7 -3
- data/lib/rika/version.rb +1 -1
- data/spec/rika/cli/args_parser_spec.rb +39 -0
- metadata +5 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b7c8161615ba58a4b172722ebff7dad84ce2c828fdaa11f93277b99ec0b73916
|
4
|
+
data.tar.gz: bf76b6eebc07d3eba255ac14352f6b6807e842ae1ec2b3389afd027ec85b50c8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3b0fd15c3f94142966fada1c952b40873de2be4d2da064859ad44ccdec2fc44bf0d8637bf05a6543b89b015fb3ca15f15f26f721c7b3d9d6ce33568fac6f5868
|
7
|
+
data.tar.gz: 1055d3c63959dbcbf639aff24edc0040b5a2e1c1d4f834301912ffdc0010177be633f0567664b725bcaa916babb498f8123054a438718d06446c097b252f3294
|
data/README.md
CHANGED
@@ -144,6 +144,29 @@ If you find yourself using the same options over and over again, you can put the
|
|
144
144
|
variable. For example, if the default behavior of sorting keys does not work for your language, you can disable it
|
145
145
|
for all invocations of the `rika` command by specifying `-k-` in the RIKA_OPTIONS environment variable.
|
146
146
|
|
147
|
+
### Using Wildcards for File Specification
|
148
|
+
|
149
|
+
Rika now supports in-app expansion of wildcard patterns for file specification. This means you can quote wildcard patterns
|
150
|
+
to prevent the shell from expanding them, and Rika will handle the expansion internally:
|
151
|
+
|
152
|
+
```bash
|
153
|
+
# Let Rika handle the expansion (no practical limit on number of files)
|
154
|
+
rika '**/*.pdf'
|
155
|
+
|
156
|
+
# Shell expands wildcards (limited by shell's maximum argument length)
|
157
|
+
rika **/*.pdf
|
158
|
+
```
|
159
|
+
|
160
|
+
This is particularly useful when dealing with large numbers of files, as shell expansion may hit command line length limits.
|
161
|
+
In-app expansion has no practical limit on the number of files that can be processed.
|
162
|
+
|
163
|
+
Supported wildcard patterns:
|
164
|
+
- `*` - Match any number of characters
|
165
|
+
- `?` - Match a single character
|
166
|
+
- `[abc]` - Match one character from the set
|
167
|
+
- `{a,b,c}` - Match any of the patterns a, b, or c
|
168
|
+
- `**` - Recursive directory matching (match all files in all subdirectories)
|
169
|
+
|
147
170
|
### Machine Readable Data Support
|
148
171
|
|
149
172
|
If both metadata and text are output, and the same output format is used for both, and that format is JSON
|
data/RELEASE_NOTES.md
CHANGED
data/lib/rika/cli/args_parser.rb
CHANGED
@@ -94,10 +94,14 @@ class ArgsParser
|
|
94
94
|
end
|
95
95
|
|
96
96
|
# @return [Array] the targets specified on the command line, possibly expanded by the shell,
|
97
|
-
# and with any directories removed.
|
97
|
+
# and with any directories removed. If a target contains a wildcard pattern (*, ?, []),
|
98
|
+
# it will be expanded using Dir.glob within Ruby, which has no practical limit on the number of files.
|
98
99
|
private def create_target_array
|
99
|
-
|
100
|
-
|
100
|
+
args.each_with_object([]) do |arg, result|
|
101
|
+
# Expand any potential globs and reject directories
|
102
|
+
files = Dir.glob(arg).reject { |file| File.directory?(file) }
|
103
|
+
result.concat(files)
|
104
|
+
end.map(&:freeze).freeze
|
101
105
|
end
|
102
106
|
|
103
107
|
# Fills in the second format option character if absent, and removes any excess characters
|
data/lib/rika/version.rb
CHANGED
@@ -6,6 +6,7 @@ require 'rika/cli/args_parser'
|
|
6
6
|
|
7
7
|
describe ArgsParser do
|
8
8
|
let(:versions_regex) { /Versions:.*Rika: (\d+\.\d+\.\d+(-\w+)?).*Tika: (\d+\.\d+\.\d+(-\w+)?)/ }
|
9
|
+
let(:fixtures_dir) { File.expand_path(File.join(File.dirname(__FILE__), '../../fixtures')) }
|
9
10
|
|
10
11
|
specify 'returns a hash of options, a target array, and help text' do
|
11
12
|
options, targets, help_text = described_class.call([])
|
@@ -114,4 +115,42 @@ describe ArgsParser do
|
|
114
115
|
expect(described_class::DEFAULT_OPTIONS).to be_frozen
|
115
116
|
end
|
116
117
|
end
|
118
|
+
|
119
|
+
describe '#create_target_array' do
|
120
|
+
let(:args_parser) { described_class.new }
|
121
|
+
|
122
|
+
it 'removes directories from the target array' do
|
123
|
+
allow(args_parser).to receive(:args).and_return([fixtures_dir])
|
124
|
+
expect(args_parser.send(:create_target_array)).to be_empty
|
125
|
+
end
|
126
|
+
|
127
|
+
it 'keeps regular files in the target array' do
|
128
|
+
tiny_filespec = fixture_path('tiny.txt')
|
129
|
+
allow(args_parser).to receive(:args).and_return([tiny_filespec])
|
130
|
+
expect(args_parser.send(:create_target_array)).to eq([tiny_filespec])
|
131
|
+
end
|
132
|
+
|
133
|
+
context 'with wildcard patterns' do
|
134
|
+
it 'expands wildcard patterns using Dir.glob' do
|
135
|
+
pattern = fixture_path('*.txt')
|
136
|
+
allow(args_parser).to receive(:args).and_return([pattern])
|
137
|
+
|
138
|
+
result = args_parser.send(:create_target_array)
|
139
|
+
# Verify we got at least one .txt file and no directories
|
140
|
+
expect(result).not_to be_empty
|
141
|
+
expect(result.all? { |f| f.end_with?('.txt') }).to be true
|
142
|
+
end
|
143
|
+
|
144
|
+
it 'removes directories from the expanded results' do
|
145
|
+
# Use a pattern that will match both files and the fixtures dir
|
146
|
+
pattern = File.join(fixtures_dir, '*')
|
147
|
+
allow(args_parser).to receive(:args).and_return([pattern])
|
148
|
+
|
149
|
+
result = args_parser.send(:create_target_array)
|
150
|
+
# Verify we got some files but no directories
|
151
|
+
expect(result).not_to be_empty
|
152
|
+
expect(result.any? { |f| File.directory?(f) }).to be false
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
117
156
|
end
|
metadata
CHANGED
@@ -1,17 +1,17 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rika
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0
|
4
|
+
version: 2.1.0
|
5
5
|
platform: java
|
6
6
|
authors:
|
7
7
|
- Richard Nyström
|
8
8
|
- Keith Bennett
|
9
|
-
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date: 2025-
|
11
|
+
date: 2025-04-20 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
14
|
+
name: awesome_print
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
@@ -20,9 +20,8 @@ dependencies:
|
|
20
20
|
- - ">="
|
21
21
|
- !ruby/object:Gem::Version
|
22
22
|
version: 1.9.2
|
23
|
-
name: awesome_print
|
24
|
-
prerelease: false
|
25
23
|
type: :runtime
|
24
|
+
prerelease: false
|
26
25
|
version_requirements: !ruby/object:Gem::Requirement
|
27
26
|
requirements:
|
28
27
|
- - "~>"
|
@@ -112,8 +111,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
112
111
|
- !ruby/object:Gem::Version
|
113
112
|
version: '0'
|
114
113
|
requirements: []
|
115
|
-
rubygems_version: 3.
|
116
|
-
signing_key:
|
114
|
+
rubygems_version: 3.6.3
|
117
115
|
specification_version: 4
|
118
116
|
summary: A JRuby wrapper for Apache Tika to extract text and metadata from files of
|
119
117
|
various formats.
|