kreuzberg 4.0.0.pre.rc.6 → 4.0.0.rc1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (175) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +0 -6
  3. data/.rubocop.yaml +534 -1
  4. data/Gemfile +2 -1
  5. data/Gemfile.lock +11 -11
  6. data/README.md +5 -10
  7. data/examples/async_patterns.rb +0 -1
  8. data/ext/kreuzberg_rb/extconf.rb +0 -10
  9. data/ext/kreuzberg_rb/native/Cargo.toml +15 -23
  10. data/ext/kreuzberg_rb/native/build.rs +2 -0
  11. data/ext/kreuzberg_rb/native/include/ieeefp.h +1 -1
  12. data/ext/kreuzberg_rb/native/include/msvc_compat/strings.h +1 -1
  13. data/ext/kreuzberg_rb/native/include/strings.h +2 -2
  14. data/ext/kreuzberg_rb/native/include/unistd.h +1 -1
  15. data/ext/kreuzberg_rb/native/src/lib.rs +16 -75
  16. data/kreuzberg.gemspec +14 -57
  17. data/lib/kreuzberg/cache_api.rb +0 -1
  18. data/lib/kreuzberg/cli.rb +2 -2
  19. data/lib/kreuzberg/config.rb +2 -9
  20. data/lib/kreuzberg/errors.rb +7 -75
  21. data/lib/kreuzberg/extraction_api.rb +0 -1
  22. data/lib/kreuzberg/setup_lib_path.rb +0 -1
  23. data/lib/kreuzberg/version.rb +1 -1
  24. data/lib/kreuzberg.rb +0 -21
  25. data/pkg/kreuzberg-4.0.0.rc1.gem +0 -0
  26. data/sig/kreuzberg.rbs +3 -55
  27. data/spec/binding/cli_proxy_spec.rb +4 -2
  28. data/spec/binding/cli_spec.rb +11 -12
  29. data/spec/examples.txt +104 -0
  30. data/spec/fixtures/config.yaml +1 -0
  31. data/spec/spec_helper.rb +1 -1
  32. data/vendor/kreuzberg/Cargo.toml +42 -112
  33. data/vendor/kreuzberg/README.md +2 -2
  34. data/vendor/kreuzberg/build.rs +4 -18
  35. data/vendor/kreuzberg/src/bin/profile_extract.rs +455 -0
  36. data/vendor/kreuzberg/src/cache/mod.rs +3 -27
  37. data/vendor/kreuzberg/src/core/batch_mode.rs +0 -60
  38. data/vendor/kreuzberg/src/core/extractor.rs +81 -202
  39. data/vendor/kreuzberg/src/core/io.rs +2 -4
  40. data/vendor/kreuzberg/src/core/mime.rs +12 -2
  41. data/vendor/kreuzberg/src/core/mod.rs +1 -4
  42. data/vendor/kreuzberg/src/core/pipeline.rs +33 -111
  43. data/vendor/kreuzberg/src/embeddings.rs +16 -125
  44. data/vendor/kreuzberg/src/error.rs +1 -1
  45. data/vendor/kreuzberg/src/extraction/docx.rs +1 -1
  46. data/vendor/kreuzberg/src/extraction/image.rs +13 -13
  47. data/vendor/kreuzberg/src/extraction/libreoffice.rs +1 -0
  48. data/vendor/kreuzberg/src/extraction/mod.rs +5 -9
  49. data/vendor/kreuzberg/src/extraction/office_metadata/mod.rs +0 -2
  50. data/vendor/kreuzberg/src/extraction/pandoc/batch.rs +275 -0
  51. data/vendor/kreuzberg/src/extraction/pandoc/mime_types.rs +178 -0
  52. data/vendor/kreuzberg/src/extraction/pandoc/mod.rs +491 -0
  53. data/vendor/kreuzberg/src/extraction/pandoc/server.rs +496 -0
  54. data/vendor/kreuzberg/src/extraction/pandoc/subprocess.rs +1188 -0
  55. data/vendor/kreuzberg/src/extraction/pandoc/version.rs +162 -0
  56. data/vendor/kreuzberg/src/extractors/archive.rs +0 -21
  57. data/vendor/kreuzberg/src/extractors/docx.rs +128 -16
  58. data/vendor/kreuzberg/src/extractors/email.rs +0 -14
  59. data/vendor/kreuzberg/src/extractors/excel.rs +20 -19
  60. data/vendor/kreuzberg/src/extractors/html.rs +154 -137
  61. data/vendor/kreuzberg/src/extractors/image.rs +4 -7
  62. data/vendor/kreuzberg/src/extractors/mod.rs +9 -106
  63. data/vendor/kreuzberg/src/extractors/pandoc.rs +201 -0
  64. data/vendor/kreuzberg/src/extractors/pdf.rs +15 -12
  65. data/vendor/kreuzberg/src/extractors/pptx.rs +3 -17
  66. data/vendor/kreuzberg/src/extractors/structured.rs +0 -14
  67. data/vendor/kreuzberg/src/extractors/text.rs +5 -23
  68. data/vendor/kreuzberg/src/extractors/xml.rs +0 -7
  69. data/vendor/kreuzberg/src/keywords/rake.rs +1 -0
  70. data/vendor/kreuzberg/src/lib.rs +1 -4
  71. data/vendor/kreuzberg/src/mcp/mod.rs +1 -1
  72. data/vendor/kreuzberg/src/mcp/server.rs +3 -5
  73. data/vendor/kreuzberg/src/ocr/processor.rs +2 -18
  74. data/vendor/kreuzberg/src/pdf/error.rs +1 -1
  75. data/vendor/kreuzberg/src/pdf/table.rs +44 -17
  76. data/vendor/kreuzberg/src/pdf/text.rs +3 -0
  77. data/vendor/kreuzberg/src/plugins/extractor.rs +5 -8
  78. data/vendor/kreuzberg/src/plugins/ocr.rs +11 -2
  79. data/vendor/kreuzberg/src/plugins/processor.rs +1 -2
  80. data/vendor/kreuzberg/src/plugins/registry.rs +0 -13
  81. data/vendor/kreuzberg/src/plugins/validator.rs +8 -9
  82. data/vendor/kreuzberg/src/stopwords/mod.rs +2 -2
  83. data/vendor/kreuzberg/src/types.rs +12 -42
  84. data/vendor/kreuzberg/tests/batch_orchestration.rs +5 -19
  85. data/vendor/kreuzberg/tests/batch_processing.rs +3 -15
  86. data/vendor/kreuzberg/tests/chunking_offset_demo.rs +92 -0
  87. data/vendor/kreuzberg/tests/concurrency_stress.rs +1 -17
  88. data/vendor/kreuzberg/tests/config_features.rs +0 -18
  89. data/vendor/kreuzberg/tests/config_loading_tests.rs +39 -15
  90. data/vendor/kreuzberg/tests/core_integration.rs +7 -24
  91. data/vendor/kreuzberg/tests/csv_integration.rs +81 -71
  92. data/vendor/kreuzberg/tests/docx_metadata_extraction_test.rs +25 -23
  93. data/vendor/kreuzberg/tests/pandoc_integration.rs +503 -0
  94. data/vendor/kreuzberg/tests/pipeline_integration.rs +1 -0
  95. data/vendor/kreuzberg/tests/plugin_postprocessor_test.rs +1 -0
  96. data/vendor/kreuzberg/tests/registry_integration_tests.rs +22 -1
  97. data/vendor/kreuzberg/tests/security_validation.rs +1 -12
  98. metadata +25 -90
  99. data/.rubocop.yml +0 -538
  100. data/ext/kreuzberg_rb/native/Cargo.lock +0 -6535
  101. data/lib/kreuzberg/error_context.rb +0 -32
  102. data/vendor/kreuzberg/benches/otel_overhead.rs +0 -48
  103. data/vendor/kreuzberg/src/extraction/markdown.rs +0 -213
  104. data/vendor/kreuzberg/src/extraction/office_metadata/odt_properties.rs +0 -287
  105. data/vendor/kreuzberg/src/extractors/bibtex.rs +0 -469
  106. data/vendor/kreuzberg/src/extractors/docbook.rs +0 -502
  107. data/vendor/kreuzberg/src/extractors/epub.rs +0 -707
  108. data/vendor/kreuzberg/src/extractors/fictionbook.rs +0 -491
  109. data/vendor/kreuzberg/src/extractors/fictionbook.rs.backup2 +0 -738
  110. data/vendor/kreuzberg/src/extractors/jats.rs +0 -1051
  111. data/vendor/kreuzberg/src/extractors/jupyter.rs +0 -367
  112. data/vendor/kreuzberg/src/extractors/latex.rs +0 -652
  113. data/vendor/kreuzberg/src/extractors/markdown.rs +0 -700
  114. data/vendor/kreuzberg/src/extractors/odt.rs +0 -628
  115. data/vendor/kreuzberg/src/extractors/opml.rs +0 -634
  116. data/vendor/kreuzberg/src/extractors/orgmode.rs +0 -528
  117. data/vendor/kreuzberg/src/extractors/rst.rs +0 -576
  118. data/vendor/kreuzberg/src/extractors/rtf.rs +0 -810
  119. data/vendor/kreuzberg/src/extractors/security.rs +0 -484
  120. data/vendor/kreuzberg/src/extractors/security_tests.rs +0 -367
  121. data/vendor/kreuzberg/src/extractors/typst.rs +0 -650
  122. data/vendor/kreuzberg/src/panic_context.rs +0 -154
  123. data/vendor/kreuzberg/tests/api_extract_multipart.rs +0 -52
  124. data/vendor/kreuzberg/tests/bibtex_parity_test.rs +0 -421
  125. data/vendor/kreuzberg/tests/docbook_extractor_tests.rs +0 -498
  126. data/vendor/kreuzberg/tests/docx_vs_pandoc_comparison.rs +0 -370
  127. data/vendor/kreuzberg/tests/epub_native_extractor_tests.rs +0 -275
  128. data/vendor/kreuzberg/tests/fictionbook_extractor_tests.rs +0 -228
  129. data/vendor/kreuzberg/tests/html_table_test.rs +0 -551
  130. data/vendor/kreuzberg/tests/instrumentation_test.rs +0 -139
  131. data/vendor/kreuzberg/tests/jats_extractor_tests.rs +0 -639
  132. data/vendor/kreuzberg/tests/jupyter_extractor_tests.rs +0 -704
  133. data/vendor/kreuzberg/tests/latex_extractor_tests.rs +0 -496
  134. data/vendor/kreuzberg/tests/markdown_extractor_tests.rs +0 -490
  135. data/vendor/kreuzberg/tests/odt_extractor_tests.rs +0 -695
  136. data/vendor/kreuzberg/tests/opml_extractor_tests.rs +0 -616
  137. data/vendor/kreuzberg/tests/orgmode_extractor_tests.rs +0 -822
  138. data/vendor/kreuzberg/tests/rst_extractor_tests.rs +0 -692
  139. data/vendor/kreuzberg/tests/rtf_extractor_tests.rs +0 -776
  140. data/vendor/kreuzberg/tests/typst_behavioral_tests.rs +0 -1259
  141. data/vendor/kreuzberg/tests/typst_extractor_tests.rs +0 -647
  142. data/vendor/rb-sys/.cargo-ok +0 -1
  143. data/vendor/rb-sys/.cargo_vcs_info.json +0 -6
  144. data/vendor/rb-sys/Cargo.lock +0 -393
  145. data/vendor/rb-sys/Cargo.toml +0 -70
  146. data/vendor/rb-sys/Cargo.toml.orig +0 -57
  147. data/vendor/rb-sys/LICENSE-APACHE +0 -190
  148. data/vendor/rb-sys/LICENSE-MIT +0 -21
  149. data/vendor/rb-sys/bin/release.sh +0 -21
  150. data/vendor/rb-sys/build/features.rs +0 -108
  151. data/vendor/rb-sys/build/main.rs +0 -246
  152. data/vendor/rb-sys/build/stable_api_config.rs +0 -153
  153. data/vendor/rb-sys/build/version.rs +0 -48
  154. data/vendor/rb-sys/readme.md +0 -36
  155. data/vendor/rb-sys/src/bindings.rs +0 -21
  156. data/vendor/rb-sys/src/hidden.rs +0 -11
  157. data/vendor/rb-sys/src/lib.rs +0 -34
  158. data/vendor/rb-sys/src/macros.rs +0 -371
  159. data/vendor/rb-sys/src/memory.rs +0 -53
  160. data/vendor/rb-sys/src/ruby_abi_version.rs +0 -38
  161. data/vendor/rb-sys/src/special_consts.rs +0 -31
  162. data/vendor/rb-sys/src/stable_api/compiled.c +0 -179
  163. data/vendor/rb-sys/src/stable_api/compiled.rs +0 -257
  164. data/vendor/rb-sys/src/stable_api/ruby_2_6.rs +0 -316
  165. data/vendor/rb-sys/src/stable_api/ruby_2_7.rs +0 -316
  166. data/vendor/rb-sys/src/stable_api/ruby_3_0.rs +0 -324
  167. data/vendor/rb-sys/src/stable_api/ruby_3_1.rs +0 -317
  168. data/vendor/rb-sys/src/stable_api/ruby_3_2.rs +0 -315
  169. data/vendor/rb-sys/src/stable_api/ruby_3_3.rs +0 -326
  170. data/vendor/rb-sys/src/stable_api/ruby_3_4.rs +0 -327
  171. data/vendor/rb-sys/src/stable_api.rs +0 -261
  172. data/vendor/rb-sys/src/symbol.rs +0 -31
  173. data/vendor/rb-sys/src/tracking_allocator.rs +0 -332
  174. data/vendor/rb-sys/src/utils.rs +0 -89
  175. data/vendor/rb-sys/src/value_type.rs +0 -7
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9fc516b53481da9460a558cdfd860a53274d43091d8f750d76250e32059758be
4
- data.tar.gz: 886d27a343998685e302d771e3e517d0ed23620fba52fd9412c0d780f8034580
3
+ metadata.gz: bfb7dc6c3685285b053af795140387ad8292c698cbba4cfda3411390dfe3cd48
4
+ data.tar.gz: 7688722b9a8d99564044a8bca31c6fadf107208aabaa336d450069b636a02726
5
5
  SHA512:
6
- metadata.gz: 22ffa8ab5ec78709a6b09927a21ae5ef08fc8c24cccfe94a14a54f47fa087935fb84c80719bc6a00266210178d453baaa798961fd00b91bbd484d20f4f73ddd8
7
- data.tar.gz: 859ae29e4fc31def124754482f018d022580e7f4ef7a1a7662dab5a57d8a7620c0de77a218c39ff033f30af2272961fd1cc9f6460690553d7a266b9916816828
6
+ metadata.gz: 3c5b66fc2c0457f670c39e53f038a5fa040760ac3f36b8731c35210d2ca98b1cf65ae41411bd6788411f18090ffd269878cf814d4b93b4201098f5e5950782e0
7
+ data.tar.gz: 1a02c5c0ffb830454e20e03e937aa4d80e73d9e6a6ae4913ed9d35d6cdb84ffbb2141711fa3b2ce5fcd821b109613f5ec01cbfdcface5615071727b26c772347
data/.gitignore CHANGED
@@ -6,9 +6,3 @@ lib/*.dll
6
6
  lib/*.dylib
7
7
  lib/*.so
8
8
  lib/*.dll
9
-
10
- # Vendor directory for local development (symlink to ../../crates/kreuzberg)
11
- # In CI, this is replaced by the actual vendored crate files
12
- # Exception: vendor/rb-sys is patched and committed
13
- !vendor/rb-sys/
14
- vendor/
data/.rubocop.yaml CHANGED
@@ -1 +1,534 @@
1
- inherit_from: .rubocop.yml
1
+
2
+ plugins:
3
+ - rubocop-performance
4
+ - rubocop-rspec
5
+
6
+ AllCops:
7
+ TargetRubyVersion: 3.2
8
+ NewCops: enable
9
+ SuggestExtensions: false
10
+ Exclude:
11
+ - 'vendor/**/*'
12
+ - 'tmp/**/*'
13
+ - 'lib/**/*.bundle'
14
+ - 'ext/**/*'
15
+
16
+ Style/StringLiterals:
17
+ Enabled: true
18
+ EnforcedStyle: single_quotes
19
+
20
+ Style/StringLiteralsInInterpolation:
21
+ Enabled: true
22
+ EnforcedStyle: single_quotes
23
+
24
+ Style/FrozenStringLiteralComment:
25
+ Enabled: true
26
+ EnforcedStyle: always
27
+
28
+ Style/Documentation:
29
+ Enabled: false
30
+
31
+ Layout/LineLength:
32
+ Max: 120
33
+ AllowedPatterns:
34
+ - '\A\s*#'
35
+ Exclude:
36
+ - 'spec/**/*'
37
+
38
+ Metrics/BlockLength:
39
+ Max: 350
40
+ Exclude:
41
+ - '*.gemspec'
42
+ - 'examples/**/*'
43
+
44
+ Metrics/MethodLength:
45
+ Max: 18
46
+ Exclude:
47
+ - 'spec/**/*'
48
+ - 'examples/**/*'
49
+
50
+ Metrics/AbcSize:
51
+ Max: 20
52
+ Exclude:
53
+ - 'spec/**/*'
54
+ - 'examples/**/*'
55
+
56
+ Naming/FileName:
57
+ Enabled: true
58
+ Exclude:
59
+ - 'Gemfile'
60
+ - 'Rakefile'
61
+
62
+ RSpec/SpecFilePathFormat:
63
+ Enabled: false
64
+ RSpec/SpecFilePathSuffix:
65
+ Enabled: false
66
+
67
+ RSpec/PendingWithoutReason:
68
+ Enabled: false
69
+
70
+ RSpec/LeakyConstantDeclaration:
71
+ Enabled: false
72
+
73
+ RSpec/UnspecifiedException:
74
+ Enabled: false
75
+
76
+ Lint/ConstantDefinitionInBlock:
77
+ Exclude:
78
+ - 'spec/**/*'
79
+
80
+ RSpec/InstanceVariable:
81
+ Enabled: false
82
+
83
+ Gemspec/DevelopmentDependencies:
84
+ Enabled: false
85
+
86
+ Metrics/ParameterLists:
87
+ Max: 7
88
+ Exclude:
89
+ - 'lib/kreuzberg/config.rb'
90
+
91
+ Metrics/CyclomaticComplexity:
92
+ Max: 7
93
+ Exclude:
94
+ - 'lib/kreuzberg/config.rb'
95
+
96
+ RSpec/RepeatedExampleGroupBody:
97
+ Enabled: false
98
+
99
+ RSpec/DescribeClass:
100
+ Enabled: false
101
+
102
+ RSpec/PredicateMatcher:
103
+ Enabled: false
104
+
105
+ # New cops from RuboCop 1.65+
106
+ Gemspec/AddRuntimeDependency:
107
+ Enabled: true
108
+
109
+ Gemspec/AttributeAssignment:
110
+ Enabled: true
111
+
112
+ Gemspec/DeprecatedAttributeAssignment:
113
+ Enabled: true
114
+
115
+ Gemspec/RequireMFA:
116
+ Enabled: false
117
+
118
+ Layout/EmptyLinesAfterModuleInclusion:
119
+ Enabled: true
120
+
121
+ Layout/LineContinuationLeadingSpace:
122
+ Enabled: true
123
+
124
+ Layout/LineContinuationSpacing:
125
+ Enabled: true
126
+
127
+ Layout/LineEndStringConcatenationIndentation:
128
+ Enabled: true
129
+
130
+ Layout/SpaceBeforeBrackets:
131
+ Enabled: true
132
+
133
+ Lint/AmbiguousAssignment:
134
+ Enabled: true
135
+
136
+ Lint/AmbiguousOperatorPrecedence:
137
+ Enabled: true
138
+
139
+ Lint/AmbiguousRange:
140
+ Enabled: true
141
+
142
+ Lint/ArrayLiteralInRegexp:
143
+ Enabled: true
144
+
145
+ Lint/ConstantOverwrittenInRescue:
146
+ Enabled: true
147
+
148
+ Lint/ConstantReassignment:
149
+ Enabled: true
150
+
151
+ Lint/CopDirectiveSyntax:
152
+ Enabled: true
153
+
154
+ Lint/DeprecatedConstants:
155
+ Enabled: true
156
+
157
+ Lint/DuplicateBranch:
158
+ Enabled: true
159
+
160
+ Lint/DuplicateMagicComment:
161
+ Enabled: true
162
+
163
+ Lint/DuplicateMatchPattern:
164
+ Enabled: true
165
+
166
+ Lint/DuplicateRegexpCharacterClassElement:
167
+ Enabled: true
168
+
169
+ Lint/DuplicateSetElement:
170
+ Enabled: true
171
+
172
+ Lint/EmptyBlock:
173
+ Enabled: true
174
+
175
+ Lint/EmptyClass:
176
+ Enabled: true
177
+
178
+ Lint/EmptyInPattern:
179
+ Enabled: true
180
+
181
+ Lint/HashNewWithKeywordArgumentsAsDefault:
182
+ Enabled: true
183
+
184
+ Lint/IncompatibleIoSelectWithFiberScheduler:
185
+ Enabled: true
186
+
187
+ Lint/ItWithoutArgumentsInBlock:
188
+ Enabled: true
189
+
190
+ Lint/LambdaWithoutLiteralBlock:
191
+ Enabled: true
192
+
193
+ Lint/LiteralAssignmentInCondition:
194
+ Enabled: true
195
+
196
+ Lint/MixedCaseRange:
197
+ Enabled: true
198
+
199
+ Lint/NoReturnInBeginEndBlocks:
200
+ Enabled: true
201
+
202
+ Lint/NonAtomicFileOperation:
203
+ Enabled: true
204
+
205
+ Lint/NumberedParameterAssignment:
206
+ Enabled: true
207
+
208
+ Lint/NumericOperationWithConstantResult:
209
+ Enabled: true
210
+
211
+ Lint/OrAssignmentToConstant:
212
+ Enabled: true
213
+
214
+ Lint/RedundantDirGlobSort:
215
+ Enabled: true
216
+
217
+ Lint/RedundantRegexpQuantifiers:
218
+ Enabled: true
219
+
220
+ Lint/RedundantTypeConversion:
221
+ Enabled: true
222
+
223
+ Lint/RefinementImportMethods:
224
+ Enabled: true
225
+
226
+ Lint/RequireRangeParentheses:
227
+ Enabled: true
228
+
229
+ Lint/RequireRelativeSelfPath:
230
+ Enabled: true
231
+
232
+ Lint/SharedMutableDefault:
233
+ Enabled: true
234
+
235
+ Lint/SuppressedExceptionInNumberConversion:
236
+ Enabled: true
237
+
238
+ Lint/SymbolConversion:
239
+ Enabled: true
240
+
241
+ Lint/ToEnumArguments:
242
+ Enabled: true
243
+
244
+ Lint/TripleQuotes:
245
+ Enabled: true
246
+
247
+ Lint/UnescapedBracketInRegexp:
248
+ Enabled: true
249
+
250
+ Lint/UnexpectedBlockArity:
251
+ Enabled: true
252
+
253
+ Lint/UnmodifiedReduceAccumulator:
254
+ Enabled: true
255
+
256
+ Lint/UselessConstantScoping:
257
+ Enabled: true
258
+
259
+ Lint/UselessDefaultValueArgument:
260
+ Enabled: true
261
+
262
+ Lint/UselessDefined:
263
+ Enabled: true
264
+
265
+ Lint/UselessNumericOperation:
266
+ Enabled: true
267
+
268
+ Lint/UselessOr:
269
+ Enabled: true
270
+
271
+ Lint/UselessRescue:
272
+ Enabled: true
273
+
274
+ Lint/UselessRuby2Keywords:
275
+ Enabled: true
276
+
277
+ Metrics/CollectionLiteralLength:
278
+ Enabled: true
279
+
280
+ Naming/BlockForwarding:
281
+ Enabled: true
282
+
283
+ Naming/PredicateMethod:
284
+ Enabled: true
285
+
286
+ Security/CompoundHash:
287
+ Enabled: true
288
+
289
+ Security/IoMethods:
290
+ Enabled: true
291
+
292
+ Style/AmbiguousEndlessMethodDefinition:
293
+ Enabled: true
294
+
295
+ Style/ArgumentsForwarding:
296
+ Enabled: true
297
+
298
+ Style/ArrayIntersect:
299
+ Enabled: true
300
+
301
+ Style/ArrayIntersectWithSingleElement:
302
+ Enabled: true
303
+
304
+ Style/BitwisePredicate:
305
+ Enabled: true
306
+
307
+ Style/CollectionCompact:
308
+ Enabled: true
309
+
310
+ Style/CollectionQuerying:
311
+ Enabled: true
312
+
313
+ Style/CombinableDefined:
314
+ Enabled: true
315
+
316
+ Style/ComparableBetween:
317
+ Enabled: true
318
+
319
+ Style/ComparableClamp:
320
+ Enabled: true
321
+
322
+ Style/ConcatArrayLiterals:
323
+ Enabled: true
324
+
325
+ Style/DataInheritance:
326
+ Enabled: true
327
+
328
+ Style/DigChain:
329
+ Enabled: true
330
+
331
+ Style/DirEmpty:
332
+ Enabled: true
333
+
334
+ Style/DocumentDynamicEvalDefinition:
335
+ Enabled: true
336
+
337
+ Style/EmptyHeredoc:
338
+ Enabled: true
339
+
340
+ Style/EmptyStringInsideInterpolation:
341
+ Enabled: true
342
+
343
+ Style/EndlessMethod:
344
+ Enabled: true
345
+
346
+ Style/EnvHome:
347
+ Enabled: true
348
+
349
+ Style/ExactRegexpMatch:
350
+ Enabled: true
351
+
352
+ Style/FetchEnvVar:
353
+ Enabled: true
354
+
355
+ Style/FileEmpty:
356
+ Enabled: true
357
+
358
+ Style/FileNull:
359
+ Enabled: true
360
+
361
+ Style/FileRead:
362
+ Enabled: true
363
+
364
+ Style/FileTouch:
365
+ Enabled: true
366
+
367
+ Style/FileWrite:
368
+ Enabled: true
369
+
370
+ Style/HashConversion:
371
+ Enabled: true
372
+
373
+ Style/HashExcept:
374
+ Enabled: true
375
+
376
+ Style/HashFetchChain:
377
+ Enabled: true
378
+
379
+ Style/HashSlice:
380
+ Enabled: true
381
+
382
+ Style/IfWithBooleanLiteralBranches:
383
+ Enabled: true
384
+
385
+ Style/InPatternThen:
386
+ Enabled: true
387
+
388
+ Style/ItAssignment:
389
+ Enabled: true
390
+
391
+ Style/ItBlockParameter:
392
+ Enabled: true
393
+
394
+ Style/KeywordArgumentsMerging:
395
+ Enabled: true
396
+
397
+ Style/MagicCommentFormat:
398
+ Enabled: true
399
+
400
+ Style/MapCompactWithConditionalBlock:
401
+ Enabled: true
402
+
403
+ Style/MapIntoArray:
404
+ Enabled: true
405
+
406
+ Style/MapToHash:
407
+ Enabled: true
408
+
409
+ Style/MapToSet:
410
+ Enabled: true
411
+
412
+ Style/MinMaxComparison:
413
+ Enabled: true
414
+
415
+ Style/MultilineInPatternThen:
416
+ Enabled: true
417
+
418
+ Style/NegatedIfElseCondition:
419
+ Enabled: true
420
+
421
+ Style/NestedFileDirname:
422
+ Enabled: true
423
+
424
+ Style/NilLambda:
425
+ Enabled: true
426
+
427
+ Style/NumberedParameters:
428
+ Enabled: true
429
+
430
+ Style/NumberedParametersLimit:
431
+ Enabled: true
432
+
433
+ Style/ObjectThen:
434
+ Enabled: true
435
+
436
+ Style/OpenStructUse:
437
+ Enabled: true
438
+
439
+ Style/OperatorMethodCall:
440
+ Enabled: true
441
+
442
+ Style/QuotedSymbols:
443
+ Enabled: true
444
+
445
+ Style/RedundantArgument:
446
+ Enabled: true
447
+
448
+ Style/RedundantArrayConstructor:
449
+ Enabled: true
450
+
451
+ Style/RedundantArrayFlatten:
452
+ Enabled: true
453
+
454
+ Style/RedundantConstantBase:
455
+ Enabled: true
456
+
457
+ Style/RedundantCurrentDirectoryInPath:
458
+ Enabled: true
459
+
460
+ Style/RedundantDoubleSplatHashBraces:
461
+ Enabled: true
462
+
463
+ Style/RedundantEach:
464
+ Enabled: true
465
+
466
+ Style/RedundantFilterChain:
467
+ Enabled: true
468
+
469
+ Style/RedundantFormat:
470
+ Enabled: true
471
+
472
+ Style/RedundantHeredocDelimiterQuotes:
473
+ Enabled: true
474
+
475
+ Style/RedundantInitialize:
476
+ Enabled: true
477
+
478
+ Style/RedundantInterpolationUnfreeze:
479
+ Enabled: true
480
+
481
+ Style/RedundantLineContinuation:
482
+ Enabled: true
483
+
484
+ Style/RedundantRegexpArgument:
485
+ Enabled: true
486
+
487
+ Style/RedundantRegexpConstructor:
488
+ Enabled: true
489
+
490
+ Style/RedundantSelfAssignmentBranch:
491
+ Enabled: true
492
+
493
+ Style/RedundantStringEscape:
494
+ Enabled: true
495
+
496
+ Style/ReturnNilInPredicateMethodDefinition:
497
+ Enabled: true
498
+
499
+ Style/SafeNavigationChainLength:
500
+ Enabled: true
501
+
502
+ Style/SelectByRegexp:
503
+ Enabled: true
504
+
505
+ Style/SendWithLiteralMethodName:
506
+ Enabled: true
507
+
508
+ Style/SingleLineDoEndBlock:
509
+ Enabled: true
510
+
511
+ Style/StringChars:
512
+ Enabled: true
513
+
514
+ Style/SuperArguments:
515
+ Enabled: true
516
+
517
+ Style/SuperWithArgsParentheses:
518
+ Enabled: true
519
+
520
+ Style/SwapValues:
521
+ Enabled: true
522
+
523
+ Style/YAMLFileRead:
524
+ Enabled: true
525
+
526
+ # Adjust RSpec metrics for test blocks
527
+ RSpec/ExampleLength:
528
+ Max: 35
529
+
530
+ RSpec/MultipleExpectations:
531
+ Max: 15
532
+
533
+ RSpec/NestedGroups:
534
+ Max: 6
data/Gemfile CHANGED
@@ -5,4 +5,5 @@ source 'https://rubygems.org'
5
5
  gemspec
6
6
 
7
7
  gem 'pry', '~> 0.14', group: :development
8
- gem 'pry-byebug', '~> 3.10', group: :development, platforms: %i[ruby mri]
8
+
9
+ gem 'pry-byebug', '~> 3.10', group: :development unless Gem.win_platform?
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- kreuzberg (4.0.0.pre.rc.6)
4
+ kreuzberg (4.0.0.rc1)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -25,7 +25,7 @@ GEM
25
25
  byebug (12.0.0)
26
26
  coderay (1.1.3)
27
27
  concurrent-ruby (1.3.5)
28
- connection_pool (3.0.2)
28
+ connection_pool (2.5.4)
29
29
  csv (3.3.5)
30
30
  diff-lcs (1.6.2)
31
31
  drb (2.2.3)
@@ -34,7 +34,7 @@ GEM
34
34
  fileutils (1.8.0)
35
35
  i18n (1.14.7)
36
36
  concurrent-ruby (~> 1.0)
37
- json (2.17.1)
37
+ json (2.16.0)
38
38
  language_server-protocol (3.17.0.5)
39
39
  lint_roller (1.1.0)
40
40
  listen (3.9.0)
@@ -60,12 +60,12 @@ GEM
60
60
  rake (13.3.1)
61
61
  rake-compiler (1.3.0)
62
62
  rake
63
- rake-compiler-dock (1.10.0)
63
+ rake-compiler-dock (1.9.1)
64
64
  rb-fsevent (0.11.2)
65
65
  rb-inotify (0.11.1)
66
66
  ffi (~> 1.0)
67
- rb_sys (0.9.119)
68
- rake-compiler-dock (= 1.10.0)
67
+ rb_sys (0.9.117)
68
+ rake-compiler-dock (= 1.9.1)
69
69
  rbs (3.9.5)
70
70
  logger
71
71
  regexp_parser (2.11.3)
@@ -131,20 +131,20 @@ GEM
131
131
  unicode-emoji (~> 4.1)
132
132
  unicode-emoji (4.1.0)
133
133
  uri (1.1.1)
134
- yard (0.9.38)
134
+ yard (0.9.37)
135
135
 
136
136
  PLATFORMS
137
- arm64-darwin-23
138
137
  arm64-darwin-24
138
+ ruby
139
139
 
140
140
  DEPENDENCIES
141
- bundler (~> 4.0)
141
+ bundler (~> 2.0)
142
142
  kreuzberg!
143
143
  pry (~> 0.14)
144
144
  pry-byebug (~> 3.10)
145
145
  rake (~> 13.0)
146
146
  rake-compiler (~> 1.2)
147
- rb_sys (~> 0.9.119)
147
+ rb_sys (~> 0.9)
148
148
  rbs (~> 3.0)
149
149
  rspec (~> 3.12)
150
150
  rubocop (~> 1.66)
@@ -154,4 +154,4 @@ DEPENDENCIES
154
154
  yard (~> 0.9)
155
155
 
156
156
  BUNDLED WITH
157
- 4.0.0
157
+ 2.7.2
data/README.md CHANGED
@@ -3,7 +3,7 @@
3
3
  [![RubyGems](https://img.shields.io/gem/v/kreuzberg)](https://rubygems.org/gems/kreuzberg)
4
4
  [![Crates.io](https://img.shields.io/crates/v/kreuzberg)](https://crates.io/crates/kreuzberg)
5
5
  [![PyPI](https://img.shields.io/pypi/v/kreuzberg)](https://pypi.org/project/kreuzberg/)
6
- [![npm](https://img.shields.io/npm/v/kreuzberg)](https://www.npmjs.com/package/kreuzberg)
6
+ [![npm](https://img.shields.io/npm/v/@goldziher/kreuzberg)](https://www.npmjs.com/package/@goldziher/kreuzberg)
7
7
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
8
8
  [![Documentation](https://img.shields.io/badge/docs-kreuzberg.dev-blue)](https://kreuzberg.dev)
9
9
 
@@ -12,7 +12,7 @@ High-performance document intelligence for Ruby, powered by Rust.
12
12
  Extract text, tables, images, and metadata from 30+ file formats including PDF, DOCX, PPTX, XLSX, images, and more.
13
13
 
14
14
  > **🚀 Version 4.0.0 Release Candidate**
15
- > This is a pre-release version. We invite you to test the library and [report any issues](https://github.com/kreuzberg-dev/kreuzberg/issues) you encounter.
15
+ > This is a pre-release version. We invite you to test the library and [report any issues](https://github.com/Goldziher/kreuzberg/issues) you encounter.
16
16
 
17
17
  ## Features
18
18
 
@@ -390,15 +390,12 @@ Kreuzberg's Rust core provides significant performance improvements:
390
390
 
391
391
  ```bash
392
392
  # Clone the repository
393
- git clone https://github.com/kreuzberg-dev/kreuzberg.git
393
+ git clone https://github.com/Goldziher/kreuzberg.git
394
394
  cd kreuzberg/packages/ruby
395
395
 
396
396
  # Install dependencies
397
397
  bundle install
398
398
 
399
- # Set up vendor symlink for local development (required for building)
400
- ln -sfn ../../crates/kreuzberg vendor/kreuzberg
401
-
402
399
  # Build the Rust extension
403
400
  bundle exec rake compile
404
401
 
@@ -409,8 +406,6 @@ bundle exec rspec
409
406
  bundle exec rubocop
410
407
  ```
411
408
 
412
- **Note**: The Ruby bindings use a vendored copy of the core `kreuzberg` Rust crate. For local development, create a symlink at `vendor/kreuzberg` pointing to `../../crates/kreuzberg`. In CI and gem packaging, the actual vendored files are copied to this location.
413
-
414
409
  ## License
415
410
 
416
411
  MIT License. See [LICENSE](../../LICENSE) for details.
@@ -422,5 +417,5 @@ Contributions are welcome! Please see [CONTRIBUTING.md](../../CONTRIBUTING.md) f
422
417
  ## Links
423
418
 
424
419
  - **Documentation**: https://docs.kreuzberg.dev
425
- - **GitHub**: https://github.com/kreuzberg-dev/kreuzberg
426
- - **Issues**: https://github.com/kreuzberg-dev/kreuzberg/issues
420
+ - **GitHub**: https://github.com/Goldziher/kreuzberg
421
+ - **Issues**: https://github.com/Goldziher/kreuzberg/issues
@@ -236,7 +236,6 @@ end
236
236
  # Pattern 15: Custom Ruby OCR Backend Plugin
237
237
  # ============================================================================
238
238
 
239
- # Example OCR backend implementation for custom processing.
240
239
  class CustomOcrBackend
241
240
  def process_image(image_bytes, language)
242
241
  # In a real implementation, you would:
@@ -4,15 +4,6 @@ require 'mkmf'
4
4
  require 'rb_sys/mkmf'
5
5
  require 'rbconfig'
6
6
 
7
- # Set shorter build directory for Windows to avoid MAX_PATH issues
8
- if Gem.win_platform?
9
- # Use a much shorter path to avoid Windows MAX_PATH (260 char) limit
10
- short_target_dir = 'D:/kz-build'
11
- ENV['CARGO_TARGET_DIR'] = short_target_dir
12
- ENV['OUT_DIR'] = short_target_dir
13
- puts "Windows detected: Using short build path #{short_target_dir}"
14
- end
15
-
16
7
  if /mswin|mingw/.match?(RbConfig::CONFIG['host_os'])
17
8
  devkit = ENV.fetch('RI_DEVKIT', nil)
18
9
  prefix = ENV['MSYSTEM_PREFIX'] || '/ucrt64'
@@ -40,6 +31,5 @@ native_dir = 'native'
40
31
 
41
32
  create_rust_makefile('kreuzberg_rb') do |config|
42
33
  config.profile = default_profile.to_sym
43
- # Build from the actual Rust crate location (native/) so Cargo.toml is resolvable in packaged gems
44
34
  config.ext_dir = native_dir
45
35
  end