tahweel 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/bin/tahweel-ui ADDED
@@ -0,0 +1,300 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # Add the ../lib directory to the load path so we can require 'tahweel'
5
+ $LOAD_PATH.unshift(File.expand_path("../lib", __dir__))
6
+
7
+ require "matrix"
8
+ require "glimmer-dsl-libui"
9
+ require "launchy"
10
+ require "tahweel"
11
+
12
+ Tahweel::Authorizer.authorize unless defined?(Ocran)
13
+
14
+ class TahweelApp # rubocop:disable Metrics/ClassLength,Style/Documentation
15
+ include Glimmer::LibUI::Application
16
+
17
+ TRANSLATIONS = {
18
+ # https://www.perplexity.ai/search/add-direction-unicode-controls-Z5haIOFEQZeCbG5B8Wt2dg#3
19
+ ar: {
20
+ window_title: "تحويل",
21
+ title_label: "‫تحويل: حوّل الملفات من صيغة ‪PDF‬ إلى ‪TXT‬ و ‪DOCX‬ ↓‬",
22
+ note_label: "‫ملاحظة: يدعم تحويل الملفات بصيغة ‪PDF‬ أو صورة (‪JPG‬ و ‪JPEG‬ و ‪PNG‬) فقط.‬",
23
+ file_btn: "تحويل ملف واحد",
24
+ folder_btn: "تحويل مجلد كامل",
25
+ language_btn: "English",
26
+ global_progress: "التقدم العام:",
27
+ file_progress: "تقدم الملف الحالي:",
28
+ status_done: "انتهى التحويل.",
29
+ msg_success_title: "اكتمل التحويل",
30
+ msg_error_title: "خطأ",
31
+ msg_no_files: "لم نعثر على ملفات لتحويلها.",
32
+ msg_bad_extension: "صيغة الملف غير مدعومة.",
33
+ stage_preparing: "جارٍ التحضير...",
34
+ stage_splitting: "جارٍ تقسيم الملف...",
35
+ stage_ocr: "جارٍ استخراج النصوص...",
36
+ stage_done: "انتهى"
37
+ },
38
+ en: {
39
+ window_title: "Tahweel",
40
+ title_label: "Tahweel: Convert PDF files to TXT and DOCX ↓",
41
+ note_label: "Note: Tahweel supports PDF or image files (JPG, JPEG, and PNG) only.",
42
+ file_btn: "Convert a Single File",
43
+ folder_btn: "Convert a Folder",
44
+ language_btn: "العربية",
45
+ global_progress: "Progress:",
46
+ file_progress: "Current File:",
47
+ status_done: "Conversion complete.",
48
+ msg_success_title: "Conversion Complete",
49
+ msg_error_title: "Error",
50
+ msg_no_files: "No files found to convert.",
51
+ msg_bad_extension: "Unsupported file format.",
52
+ stage_preparing: "Preparing...",
53
+ stage_splitting: "Splitting file...",
54
+ stage_ocr: "Extracting text...",
55
+ stage_done: "Done"
56
+ }
57
+ }.freeze
58
+
59
+ def initialize(*args)
60
+ @lang = :ar
61
+ @rtl_components = []
62
+ @ltr_components = []
63
+
64
+ super
65
+ end
66
+
67
+ body do
68
+ @main_window = window(t(:window_title)) do
69
+ margined true
70
+
71
+ vertical_box do
72
+ @header_label = right_aligned_label(t(:title_label))
73
+ convert_buttons
74
+ @note_label = right_aligned_label(t(:note_label))
75
+
76
+ @progress_section = progress_section
77
+ @progress_section.visible = false
78
+
79
+ horizontal_separator { stretchy false }
80
+
81
+ @language_btn = language_button
82
+ end
83
+ end
84
+ end
85
+
86
+ private
87
+
88
+ def language_button # rubocop:disable Metrics/MethodLength
89
+ ref = nil
90
+
91
+ horizontal_box do
92
+ stretchy false
93
+
94
+ right_alignment_label = label("") { stretchy true }
95
+ right_alignment_label.visible = false
96
+ @rtl_components << right_alignment_label
97
+
98
+ ref = button(t(:language_btn)) do
99
+ stretchy false
100
+ on_clicked { toggle_language }
101
+ end
102
+
103
+ left_alignment_label = label("") { stretchy true }
104
+ @ltr_components << left_alignment_label
105
+ end
106
+
107
+ ref
108
+ end
109
+
110
+ def convert_buttons
111
+ horizontal_box do
112
+ @ar_convert_folder_btn = button(t(:folder_btn, lang: :ar)) { on_clicked { on_folder_click } }
113
+ @rtl_components << @ar_convert_folder_btn
114
+
115
+ @convert_file_btn = button(t(:file_btn)) { on_clicked { on_file_click } }
116
+
117
+ @en_convert_folder_btn = button(t(:folder_btn, lang: :en)) { on_clicked { on_folder_click } }
118
+ @en_convert_folder_btn.visible = false
119
+ @rtl_components << @en_convert_folder_btn
120
+ end
121
+ end
122
+
123
+ def progress_section
124
+ group do
125
+ stretchy false
126
+
127
+ vertical_box do
128
+ @global_progress_label = right_aligned_label(t(:global_progress))
129
+ @global_progress = progress_bar { stretchy true }
130
+
131
+ @file_progress_label = right_aligned_label(t(:file_progress))
132
+ @file_progress = progress_bar { stretchy true }
133
+ end
134
+ end
135
+ end
136
+
137
+ def on_file_click
138
+ file = open_file
139
+ convert(File.dirname(file), [file]) if file && valid_file?(file)
140
+ end
141
+
142
+ def valid_file?(file)
143
+ return true if %w[.pdf .jpg .jpeg .png].include? File.extname(file).downcase
144
+
145
+ msg_box_error(t(:msg_error_title), t(:msg_bad_extension))
146
+ false
147
+ end
148
+
149
+ def on_folder_click
150
+ folder = open_folder
151
+ paths = collect_files(folder)
152
+ convert(folder, paths) if paths.any?
153
+ end
154
+
155
+ def collect_files(folder)
156
+ paths = folder ? Tahweel::CLI::FileCollector.collect(folder) : []
157
+ return paths if folder && paths.any?
158
+
159
+ msg_box_error(t(:msg_error_title), t(:msg_no_files))
160
+ []
161
+ end
162
+
163
+ def convert(folder, paths)
164
+ disable_window
165
+ @progress_section.visible = true
166
+
167
+ Thread.new do
168
+ paths.each_with_index { |path, index| process_path(path, index, paths.size) }
169
+ finish_conversion(folder, paths.size)
170
+ end
171
+ end
172
+
173
+ def disable_window
174
+ @convert_file_btn.enabled = false
175
+ @ar_convert_folder_btn.enabled = false
176
+ @en_convert_folder_btn.enabled = false
177
+ @language_btn.enabled = false
178
+ end
179
+
180
+ def process_path(path, index, total_files)
181
+ reset_file_progress(index, total_files)
182
+
183
+ Tahweel::CLI::FileProcessor.process(path, options(path)) { update_file_progress(_1) }
184
+ end
185
+
186
+ def reset_file_progress(index, total_files)
187
+ Glimmer::LibUI.queue_main do
188
+ @global_progress.value = ((index.to_f / total_files) * 100).to_i
189
+ @global_progress_label.text = "\u202B#{t(:global_progress)} (\u202A#{index}/#{total_files}\u202C)\u202C"
190
+ @file_progress.value = 0
191
+ @file_progress_label.text = "#{t(:file_progress)} #{t(:stage_preparing)}"
192
+ end
193
+ end
194
+
195
+ def options(path)
196
+ {
197
+ dpi: 150,
198
+ processor: :google_drive,
199
+ ocr_concurrency: Tahweel::Converter::DEFAULT_CONCURRENCY,
200
+ file_concurrency: 1,
201
+ formats: %i[txt docx],
202
+ base_input_path: File.directory?(path) ? path : File.dirname(path)
203
+ }
204
+ end
205
+
206
+ def update_file_progress(progress)
207
+ Glimmer::LibUI.queue_main do
208
+ @file_progress.value = progress[:percentage].to_i
209
+ @file_progress_label.text = "#{t(:file_progress)} (#{stage_text(progress[:stage])})"
210
+ end
211
+ end
212
+
213
+ def stage_text(stage)
214
+ case stage
215
+ when :splitting then t(:stage_splitting)
216
+ when :ocr then t(:stage_ocr)
217
+ end
218
+ end
219
+
220
+ def finish_conversion(folder, total_files)
221
+ Glimmer::LibUI.queue_main do
222
+ @global_progress.value = 100
223
+ @file_progress.value = 100
224
+ @global_progress_label.text = "#{t(:global_progress)} (#{total_files}/#{total_files})"
225
+ @file_progress_label.text = "#{t(:file_progress)} (#{t(:stage_done)})"
226
+
227
+ enable_window
228
+ msg_box(t(:msg_success_title), convert_finished_message(total_files))
229
+ end
230
+
231
+ Launchy.open(folder)
232
+ end
233
+
234
+ def enable_window
235
+ @convert_file_btn.enabled = true
236
+ @ar_convert_folder_btn.enabled = true
237
+ @en_convert_folder_btn.enabled = true
238
+ @language_btn.enabled = true
239
+ end
240
+
241
+ def convert_finished_message(files_count)
242
+ if @lang == :en
243
+ "Finished converting #{files_count} file(s) successfully."
244
+ else
245
+ arabic_convert_finished_message(files_count)
246
+ end
247
+ end
248
+
249
+ def arabic_convert_finished_message(files_count)
250
+ return "انتهى تحويل ملف واحد بنجاح." if files_count == 1
251
+ return "انتهى تحويل ملفين بنجاح." if files_count == 2
252
+
253
+ last_two_digits = files_count % 100
254
+
255
+ suffix = case last_two_digits
256
+ when 0..2 then "ملف"
257
+ when 3..10 then "ملفات"
258
+ else "ملفًا"
259
+ end
260
+
261
+ "انتهى تحويل #{files_count} #{suffix} بنجاح."
262
+ end
263
+
264
+ def toggle_language # rubocop:disable Metrics/AbcSize
265
+ @lang = @lang == :ar ? :en : :ar
266
+
267
+ @main_window.title = t(:window_title)
268
+ @header_label.text = t(:title_label)
269
+ @note_label.text = t(:note_label)
270
+ @convert_file_btn.text = t(:file_btn)
271
+ @language_btn.text = t(:language_btn)
272
+
273
+ @global_progress_label.text = t(:global_progress)
274
+ @file_progress_label.text = t(:file_progress)
275
+
276
+ @rtl_components.each { _1.visible = !_1.visible }
277
+ @ltr_components.each { _1.visible = !_1.visible }
278
+ end
279
+
280
+ def t(key, lang: nil) = TRANSLATIONS[lang || @lang][key]
281
+
282
+ def right_aligned_label(text)
283
+ ref = nil
284
+
285
+ horizontal_box do
286
+ right_alignment_label = label("") { stretchy true }
287
+ @rtl_components << right_alignment_label
288
+
289
+ ref = label(text) { stretchy false }
290
+
291
+ left_alignment_label = label("") { stretchy true }
292
+ left_alignment_label.visible = false
293
+ @ltr_components << left_alignment_label
294
+ end
295
+
296
+ ref
297
+ end
298
+ end
299
+
300
+ TahweelApp.launch unless defined?(Ocran)
@@ -1,12 +1,13 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "google/apis/drive_v3"
4
- require "googleauth"
5
- require "googleauth/stores/file_token_store"
3
+ require "fileutils"
6
4
  require "socket"
7
5
  require "uri"
6
+
7
+ require "googleauth"
8
+ require "googleauth/stores/file_token_store"
9
+ require "google/apis/drive_v3"
8
10
  require "launchy"
9
- require "fileutils"
10
11
  require "xdg"
11
12
 
12
13
  module Tahweel
@@ -18,8 +19,8 @@ module Tahweel
18
19
  # 3. Initiating the OAuth 2.0 flow via a local web server if needed.
19
20
  # 4. Exchanging the authorization code for credentials and persisting them.
20
21
  class Authorizer
21
- CLIENT_ID = "512416833080-hptj9s5r92pjmdgigrcugbp40ng9isvj.apps.googleusercontent.com"
22
- CLIENT_SECRET = "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"
22
+ CLIENT_ID = "512416833080-808aqp20iith31t9rgtdmsgc53jp0sc2.apps.googleusercontent.com"
23
+ CLIENT_SECRET = "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"
23
24
 
24
25
  PORT = 3027
25
26
  REDIRECT_URI = "http://localhost:#{PORT}/".freeze
@@ -104,7 +105,16 @@ module Tahweel
104
105
  end
105
106
 
106
107
  # Opens the system default browser to the Google Authorization URL.
107
- def open_browser_for_auth = Launchy.open(@authorizer.get_authorization_url(base_url: REDIRECT_URI))
108
+ def open_browser_for_auth
109
+ url = @authorizer.get_authorization_url(base_url: REDIRECT_URI)
110
+
111
+ if Gem.win_platform?
112
+ # https://github.com/copiousfreetime/launchy/issues/167
113
+ system("start \"\" \"#{url}\"")
114
+ else
115
+ Launchy.open(url)
116
+ end
117
+ end
108
118
 
109
119
  # Listens on the local server for the OAuth callback request.
110
120
  # Handles multiple incoming requests to filter out noise (like favicon.ico).
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "fileutils"
3
4
  require "pathname"
4
5
 
5
6
  module Tahweel
@@ -17,7 +18,7 @@ module Tahweel
17
18
  # @option options [String] :output The directory to save output files (defaults to current directory).
18
19
  # @option options [Integer] :dpi DPI for PDF conversion (defaults to 150).
19
20
  # @option options [Symbol] :processor The OCR processor to use (e.g., :google_drive).
20
- # @option options [Integer] :page_concurrency Max concurrent operations.
21
+ # @option options [Integer] :ocr_concurrency Max concurrent operations.
21
22
  # @option options [Array<Symbol>] :formats Output formats (e.g., [:txt, :docx]).
22
23
  # @option options [String] :page_separator Separator string for TXT output.
23
24
  # @option options [String] :base_input_path The base path used to determine relative output structure.
@@ -66,8 +67,11 @@ module Tahweel
66
67
 
67
68
  private
68
69
 
70
+ # Creates the output directory if it doesn't exist.
69
71
  def ensure_output_directory_exists = FileUtils.mkdir_p(output_directory)
70
72
 
73
+ # Checks if all requested output formats already exist.
74
+ # @return [Boolean] True if all output files exist.
71
75
  def all_outputs_exist?
72
76
  @options[:formats].all? do |format|
73
77
  extension = Tahweel::Writer.new(format: format).extension
@@ -75,22 +79,29 @@ module Tahweel
75
79
  end
76
80
  end
77
81
 
82
+ # Checks if the input file is a PDF.
83
+ # @return [Boolean]
78
84
  def pdf? = File.extname(@file_path).downcase == ".pdf"
79
85
 
86
+ # Handles PDF processing: splitting, OCR, and saving.
87
+ # @param &block [Proc] Progress callback.
80
88
  def process_pdf(&)
81
89
  texts = Tahweel.convert(
82
90
  @file_path,
83
91
  dpi: @options[:dpi],
84
92
  processor: @options[:processor],
85
- concurrency: @options.fetch(:page_concurrency, Tahweel::Converter::DEFAULT_CONCURRENCY),
93
+ concurrency: @options.fetch(:ocr_concurrency, Tahweel::Converter::DEFAULT_CONCURRENCY),
86
94
  &
87
95
  )
88
96
 
89
97
  write_output(texts)
90
98
  end
91
99
 
100
+ # Handles single image processing.
92
101
  def process_image = write_output([Tahweel.extract(@file_path, processor: @options[:processor])])
93
102
 
103
+ # Writes the extracted text to all configured formats.
104
+ # @param texts [Array<String>] The list of extracted texts (per page).
94
105
  def write_output(texts)
95
106
  Tahweel::Writer.write(
96
107
  texts,
@@ -100,6 +111,7 @@ module Tahweel
100
111
  )
101
112
  end
102
113
 
114
+ # @return [String] The full path for output files without extension.
103
115
  def base_output_path = File.join(output_directory, File.basename(@file_path, ".*"))
104
116
 
105
117
  # Determines the output directory.
@@ -3,6 +3,13 @@
3
3
  require "etc"
4
4
  require "optparse"
5
5
 
6
+ require_relative "../version"
7
+ require_relative "../converter"
8
+ require_relative "../ocr"
9
+ require_relative "../writer"
10
+ require_relative "../writers/txt"
11
+ require_relative "file_collector"
12
+
6
13
  module Tahweel
7
14
  module CLI
8
15
  # Parses command-line arguments for the Tahweel CLI.
@@ -26,11 +33,13 @@ module Tahweel
26
33
  options
27
34
  end
28
35
 
36
+ # Returns the default configuration options.
37
+ # @return [Hash] Default options.
29
38
  def self.default_options
30
39
  {
31
40
  dpi: 150,
32
41
  processor: :google_drive,
33
- page_concurrency: Tahweel::Converter::DEFAULT_CONCURRENCY,
42
+ ocr_concurrency: Tahweel::Converter::DEFAULT_CONCURRENCY,
34
43
  file_concurrency: (Etc.nprocessors - 2).clamp(2..),
35
44
  output: nil,
36
45
  formats: %i[txt docx],
@@ -38,6 +47,10 @@ module Tahweel
38
47
  }
39
48
  end
40
49
 
50
+ # Configures the OptionParser instance.
51
+ #
52
+ # @param opts [OptionParser] The parser instance.
53
+ # @param options [Hash] The options hash to populate.
41
54
  def self.configure_parser(opts, options) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
42
55
  opts.program_name = "tahweel"
43
56
  opts.version = Tahweel::VERSION
@@ -69,17 +82,17 @@ module Tahweel
69
82
  end
70
83
 
71
84
  opts.on(
72
- "-P", "--page-concurrency PAGE_CONCURRENCY", POSITIVE_INTEGER,
73
- "Max concurrent OCR operations (default: #{options[:page_concurrency]})"
85
+ "-F", "--file-concurrency FILE_CONCURRENCY", POSITIVE_INTEGER,
86
+ "Max concurrent files to process (default: CPUs - 2 = #{options[:file_concurrency]})"
74
87
  ) do |value|
75
- options[:page_concurrency] = value
88
+ options[:file_concurrency] = value
76
89
  end
77
90
 
78
91
  opts.on(
79
- "-F", "--file-concurrency FILE_CONCURRENCY", POSITIVE_INTEGER,
80
- "Max concurrent files to process (default: CPUs - 2 = #{options[:file_concurrency]})"
92
+ "-O", "--ocr-concurrency OCR_CONCURRENCY", POSITIVE_INTEGER,
93
+ "Max concurrent OCR operations (default: #{options[:ocr_concurrency]})"
81
94
  ) do |value|
82
- options[:file_concurrency] = value
95
+ options[:ocr_concurrency] = value
83
96
  end
84
97
 
85
98
  opts.on(
@@ -104,6 +117,10 @@ module Tahweel
104
117
  end
105
118
  end
106
119
 
120
+ # Validates that arguments were provided.
121
+ #
122
+ # @param args [Array<String>] The remaining arguments after parsing.
123
+ # @param parser [OptionParser] The parser instance for printing help.
107
124
  def self.validate_args!(args, parser)
108
125
  return unless args.empty?
109
126
 
@@ -102,6 +102,7 @@ module Tahweel
102
102
 
103
103
  private
104
104
 
105
+ # Starts a background thread to refresh the display periodically.
105
106
  def start_ticker
106
107
  @ticker_thread = Thread.new do
107
108
  while @running
@@ -111,6 +112,7 @@ module Tahweel
111
112
  end
112
113
  end
113
114
 
115
+ # Renders the progress dashboard to stdout.
114
116
  def render # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
115
117
  # Move cursor up to the start of our block
116
118
  $stdout.print "\e[#{@concurrency + 1}A"
@@ -138,6 +140,11 @@ module Tahweel
138
140
  $stdout.flush
139
141
  end
140
142
 
143
+ # Truncates a file path to fit within the display.
144
+ #
145
+ # @param path [String] The file path.
146
+ # @param max_length [Integer] Maximum allowed length.
147
+ # @return [String] The truncated path.
141
148
  def truncate_path(path, max_length)
142
149
  return path.ljust(max_length) if path.length <= max_length
143
150
 
@@ -1,8 +1,9 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "fileutils"
4
+
3
5
  require_relative "pdf_splitter"
4
6
  require_relative "ocr"
5
- require "fileutils"
6
7
 
7
8
  module Tahweel
8
9
  # Orchestrates the full conversion process:
@@ -60,10 +61,10 @@ module Tahweel
60
61
  # }
61
62
  # @return [Array<String>] An array containing the text of each page.
62
63
  def convert(&)
63
- image_paths, temp_dir = PdfSplitter.split(@pdf_path, dpi: @dpi, &).values_at(:image_paths, :folder_path)
64
+ images_paths, temp_dir = PdfSplitter.split(@pdf_path, dpi: @dpi, &).values_at(:images_paths, :folder_path)
64
65
 
65
66
  begin
66
- process_images(image_paths, Ocr.new(processor: @processor_type), &)
67
+ process_images(images_paths, Ocr.new(processor: @processor_type), &)
67
68
  ensure
68
69
  FileUtils.rm_rf(temp_dir)
69
70
  end
@@ -71,31 +72,55 @@ module Tahweel
71
72
 
72
73
  private
73
74
 
74
- def process_images(image_paths, ocr_engine, &)
75
- texts = Array.new(image_paths.size)
75
+ # Processes the list of images concurrently using the specified OCR engine.
76
+ #
77
+ # @param images_paths [Array<String>] List of paths to the image files.
78
+ # @param ocr_engine [Tahweel::Ocr] The initialized OCR engine instance.
79
+ # @param &block [Proc] Block to yield progress updates.
80
+ # @return [Array<String>] The text extracted from the images.
81
+ def process_images(images_paths, ocr_engine, &)
82
+ texts = Array.new(images_paths.size)
76
83
  mutex = Mutex.new
77
84
  processed_count = 0
78
85
 
79
- run_workers(build_queue(image_paths), ocr_engine, texts, mutex) do
86
+ run_workers(build_queue(images_paths), ocr_engine, texts, mutex) do
80
87
  processed_count += 1
81
- report_progress(processed_count, image_paths.size, &)
88
+ report_progress(processed_count, images_paths.size, &)
82
89
  end
83
90
 
84
91
  texts
85
92
  end
86
93
 
87
- def build_queue(image_paths)
94
+ # Builds a queue of images paths and their indices.
95
+ #
96
+ # @param images_paths [Array<String>] List of image paths.
97
+ # @return [Queue] A queue containing [path, index] tuples.
98
+ def build_queue(images_paths)
88
99
  queue = Queue.new
89
- image_paths.each_with_index { |path, index| queue << [path, index] }
100
+ images_paths.each_with_index { |path, index| queue << [path, index] }
90
101
  queue
91
102
  end
92
103
 
104
+ # Spawns worker threads to process items from the queue.
105
+ #
106
+ # @param queue [Queue] The queue of images to process.
107
+ # @param ocr_engine [Tahweel::Ocr] The OCR engine.
108
+ # @param texts [Array<String>] Shared array to store results.
109
+ # @param mutex [Mutex] Mutex for thread-safe updates.
110
+ # @param &block [Proc] Block to yield progress updates.
93
111
  def run_workers(queue, ocr_engine, texts, mutex, &)
94
112
  Array.new(@concurrency) do
95
113
  Thread.new { process_queue_items(queue, ocr_engine, texts, mutex, &) }
96
114
  end.each(&:join)
97
115
  end
98
116
 
117
+ # Processing loop for a single worker thread.
118
+ #
119
+ # @param queue [Queue] The shared queue.
120
+ # @param ocr_engine [Tahweel::Ocr] The OCR engine.
121
+ # @param texts [Array<String>] Shared result array.
122
+ # @param mutex [Mutex] Synchronization primitive.
123
+ # @param &block [Proc] Block to yield progress updates.
99
124
  def process_queue_items(queue, ocr_engine, texts, mutex, &)
100
125
  loop do
101
126
  begin
@@ -109,6 +134,13 @@ module Tahweel
109
134
  end
110
135
  end
111
136
 
137
+ # Thread-safe saving of OCR results.
138
+ #
139
+ # @param texts [Array<String>] The results array.
140
+ # @param index [Integer] Index of the current page.
141
+ # @param text [String] Extracted text.
142
+ # @param mutex [Mutex] Synchronization primitive.
143
+ # @yield Executes the progress reporting block within the lock.
112
144
  def save_result(texts, index, text, mutex)
113
145
  mutex.synchronize do
114
146
  texts[index] = text
@@ -116,6 +148,11 @@ module Tahweel
116
148
  end
117
149
  end
118
150
 
151
+ # Reports progress to the optional block.
152
+ #
153
+ # @param processed [Integer] Number of pages processed.
154
+ # @param total [Integer] Total number of pages.
155
+ # @yield [Hash] Progress information.
119
156
  def report_progress(processed, total)
120
157
  return unless block_given?
121
158