tahweel 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 32c33604a9634bbcf0c683c1b5c0b76bdd2339e4beb0b6107ff94449e6f38453
4
- data.tar.gz: e02b8a16c1b75c903a7e78f4cbb55795164f90e91ae1190113c156ca7fda5524
3
+ metadata.gz: cfa4c4ffbb0b1229794addc7ae400b8af9fda793e7c17b403d4aa60bd92fa7f6
4
+ data.tar.gz: 2d236076739f2ae1b892669487b6ac839b1a5420c1928621787e6387e7ba50ab
5
5
  SHA512:
6
- metadata.gz: 979019e30d2698a5a99930f694392c2ed43efb9e52f2c88534e5647490c7bc157360b6f2cd1fa745758c1f8b3ad019dce1fc25e616a9a77408c67555cb025260
7
- data.tar.gz: 6b0cd84acd43c4935214eb444273eab9b0f1f8b4717873d872da2c576d3b5f0cd1818416540a84857c77db7a25492435cd513bcffee3455e9f7a94a7c6f96adf
6
+ metadata.gz: de06384d492cd26925dee76392119d0cd7c05d279e1aaafa97d055091558513847b051f5c65bc505a5213623a36004f734fbe0794b414ebc28c6982639841960
7
+ data.tar.gz: 31e2d05fbaf89c4f09ef3859243052ffd00f95ffa277368826e9ebed11539b401d8b8487920c799b6bc2bf5d52ab36cfa4ff4771f9661a38a839d5353dfed2bf
@@ -1,5 +1,6 @@
1
1
  {
2
2
  "workbench.iconTheme": "vscode-icons",
3
+ "editor.rulers": [80, 120, 140],
3
4
  "files.insertFinalNewline": true,
4
5
  "[ruby]": {
5
6
  "editor.defaultFormatter": "Shopify.ruby-lsp",
data/CHANGELOG.md CHANGED
@@ -1,5 +1,12 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.1.2] - 2026-01-03
4
+
5
+ ### Changed
6
+
7
+ - Normalize line endings in DOCX writer to proper OOXML line breaks
8
+ - Add Windows EXE build workflow
9
+
3
10
  ## [0.1.1] - 2025-12-03
4
11
 
5
12
  ### Changed
data/assets/logo.png ADDED
Binary file
Binary file
data/bin/tahweel CHANGED
@@ -11,10 +11,15 @@ require "tahweel/cli/progress_renderer"
11
11
  begin
12
12
  soft, hard = Process.getrlimit(:NOFILE)
13
13
  Process.setrlimit(:NOFILE, [4096, hard].min) if soft < 4096
14
- rescue StandardError
15
- puts "\e[31mTahweel failed to increase the soft limit of file descriptors to 4096."
16
- puts "If you faced connection errors or the CLI froze, try running `ulimit -n 4096` in your terminal."
17
- puts "If you still face issues, please report them at https://github.com/ieasybooks/tahweel.rb/issues.\e[0m"
14
+ rescue Exception # rubocop:disable Lint/RescueException
15
+ if Gem.win_platform?
16
+ puts "\e[33mWarning: Could not adjust file descriptor limit on Windows. Proceeding with default limits."
17
+ puts "If you faced connection errors or the CLI froze, please report at https://github.com/ieasybooks/tahweel.rb/issues.\e[0m"
18
+ else
19
+ puts "\e[33mWarning: Tahweel failed to increase the soft limit of file descriptors to 4096."
20
+ puts "If you faced connection errors or the CLI froze, try running `ulimit -n 4096` in your terminal."
21
+ puts "If you still face issues, please report them at https://github.com/ieasybooks/tahweel.rb/issues.\e[0m"
22
+ end
18
23
  end
19
24
 
20
25
  begin
@@ -27,7 +32,7 @@ begin
27
32
  exit 0
28
33
  end
29
34
 
30
- Tahweel::Authorizer.authorize if options[:file_concurrency] > 1 && options[:processor] == :google_drive
35
+ Tahweel::Authorizer.authorize if options[:processor] == :google_drive
31
36
 
32
37
  base_path = File.directory?(input_path) ? input_path : File.dirname(input_path)
33
38
 
data/bin/tahweel-ui ADDED
@@ -0,0 +1,300 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # Add the ../lib directory to the load path so we can require 'tahweel'
5
+ $LOAD_PATH.unshift(File.expand_path("../lib", __dir__))
6
+
7
+ require "matrix"
8
+ require "glimmer-dsl-libui"
9
+ require "launchy"
10
+ require "tahweel"
11
+
12
+ Tahweel::Authorizer.authorize unless defined?(Ocran)
13
+
14
+ class TahweelApp # rubocop:disable Metrics/ClassLength,Style/Documentation
15
+ include Glimmer::LibUI::Application
16
+
17
+ TRANSLATIONS = {
18
+ # https://www.perplexity.ai/search/add-direction-unicode-controls-Z5haIOFEQZeCbG5B8Wt2dg#3
19
+ ar: {
20
+ window_title: "تحويل",
21
+ title_label: "‫تحويل: حوّل الملفات من صيغة ‪PDF‬ إلى ‪TXT‬ و ‪DOCX‬ ↓‬",
22
+ note_label: "‫ملاحظة: يدعم تحويل الملفات بصيغة ‪PDF‬ أو صورة (‪JPG‬ و ‪JPEG‬ و ‪PNG‬) فقط.‬",
23
+ file_btn: "تحويل ملف واحد",
24
+ folder_btn: "تحويل مجلد كامل",
25
+ language_btn: "English",
26
+ global_progress: "التقدم العام:",
27
+ file_progress: "تقدم الملف الحالي:",
28
+ status_done: "انتهى التحويل.",
29
+ msg_success_title: "اكتمل التحويل",
30
+ msg_error_title: "خطأ",
31
+ msg_no_files: "لم نعثر على ملفات لتحويلها.",
32
+ msg_bad_extension: "صيغة الملف غير مدعومة.",
33
+ stage_preparing: "جارٍ التحضير...",
34
+ stage_splitting: "جارٍ تقسيم الملف...",
35
+ stage_ocr: "جارٍ استخراج النصوص...",
36
+ stage_done: "انتهى"
37
+ },
38
+ en: {
39
+ window_title: "Tahweel",
40
+ title_label: "Tahweel: Convert PDF files to TXT and DOCX ↓",
41
+ note_label: "Note: Tahweel supports PDF or image files (JPG, JPEG, and PNG) only.",
42
+ file_btn: "Convert a Single File",
43
+ folder_btn: "Convert a Folder",
44
+ language_btn: "العربية",
45
+ global_progress: "Progress:",
46
+ file_progress: "Current File:",
47
+ status_done: "Conversion complete.",
48
+ msg_success_title: "Conversion Complete",
49
+ msg_error_title: "Error",
50
+ msg_no_files: "No files found to convert.",
51
+ msg_bad_extension: "Unsupported file format.",
52
+ stage_preparing: "Preparing...",
53
+ stage_splitting: "Splitting file...",
54
+ stage_ocr: "Extracting text...",
55
+ stage_done: "Done"
56
+ }
57
+ }.freeze
58
+
59
+ def initialize(*args)
60
+ @lang = :ar
61
+ @rtl_components = []
62
+ @ltr_components = []
63
+
64
+ super
65
+ end
66
+
67
+ body do
68
+ @main_window = window(t(:window_title)) do
69
+ margined true
70
+
71
+ vertical_box do
72
+ @header_label = right_aligned_label(t(:title_label))
73
+ convert_buttons
74
+ @note_label = right_aligned_label(t(:note_label))
75
+
76
+ @progress_section = progress_section
77
+ @progress_section.visible = false
78
+
79
+ horizontal_separator { stretchy false }
80
+
81
+ @language_btn = language_button
82
+ end
83
+ end
84
+ end
85
+
86
+ private
87
+
88
+ def language_button # rubocop:disable Metrics/MethodLength
89
+ ref = nil
90
+
91
+ horizontal_box do
92
+ stretchy false
93
+
94
+ right_alignment_label = label("") { stretchy true }
95
+ right_alignment_label.visible = false
96
+ @rtl_components << right_alignment_label
97
+
98
+ ref = button(t(:language_btn)) do
99
+ stretchy false
100
+ on_clicked { toggle_language }
101
+ end
102
+
103
+ left_alignment_label = label("") { stretchy true }
104
+ @ltr_components << left_alignment_label
105
+ end
106
+
107
+ ref
108
+ end
109
+
110
+ def convert_buttons
111
+ horizontal_box do
112
+ @ar_convert_folder_btn = button(t(:folder_btn, lang: :ar)) { on_clicked { on_folder_click } }
113
+ @rtl_components << @ar_convert_folder_btn
114
+
115
+ @convert_file_btn = button(t(:file_btn)) { on_clicked { on_file_click } }
116
+
117
+ @en_convert_folder_btn = button(t(:folder_btn, lang: :en)) { on_clicked { on_folder_click } }
118
+ @en_convert_folder_btn.visible = false
119
+ @rtl_components << @en_convert_folder_btn
120
+ end
121
+ end
122
+
123
+ def progress_section
124
+ group do
125
+ stretchy false
126
+
127
+ vertical_box do
128
+ @global_progress_label = right_aligned_label(t(:global_progress))
129
+ @global_progress = progress_bar { stretchy true }
130
+
131
+ @file_progress_label = right_aligned_label(t(:file_progress))
132
+ @file_progress = progress_bar { stretchy true }
133
+ end
134
+ end
135
+ end
136
+
137
+ def on_file_click
138
+ file = open_file
139
+ convert(File.dirname(file), [file]) if file && valid_file?(file)
140
+ end
141
+
142
+ def valid_file?(file)
143
+ return true if %w[.pdf .jpg .jpeg .png].include? File.extname(file).downcase
144
+
145
+ msg_box_error(t(:msg_error_title), t(:msg_bad_extension))
146
+ false
147
+ end
148
+
149
+ def on_folder_click
150
+ folder = open_folder
151
+ paths = collect_files(folder)
152
+ convert(folder, paths) if paths.any?
153
+ end
154
+
155
+ def collect_files(folder)
156
+ paths = folder ? Tahweel::CLI::FileCollector.collect(folder) : []
157
+ return paths if folder && paths.any?
158
+
159
+ msg_box_error(t(:msg_error_title), t(:msg_no_files))
160
+ []
161
+ end
162
+
163
+ def convert(folder, paths)
164
+ disable_window
165
+ @progress_section.visible = true
166
+
167
+ Thread.new do
168
+ paths.each_with_index { |path, index| process_path(path, index, paths.size) }
169
+ finish_conversion(folder, paths.size)
170
+ end
171
+ end
172
+
173
+ def disable_window
174
+ @convert_file_btn.enabled = false
175
+ @ar_convert_folder_btn.enabled = false
176
+ @en_convert_folder_btn.enabled = false
177
+ @language_btn.enabled = false
178
+ end
179
+
180
+ def process_path(path, index, total_files)
181
+ reset_file_progress(index, total_files)
182
+
183
+ Tahweel::CLI::FileProcessor.process(path, options(path)) { update_file_progress(_1) }
184
+ end
185
+
186
+ def reset_file_progress(index, total_files)
187
+ Glimmer::LibUI.queue_main do
188
+ @global_progress.value = ((index.to_f / total_files) * 100).to_i
189
+ @global_progress_label.text = "\u202B#{t(:global_progress)} (\u202A#{index}/#{total_files}\u202C)\u202C"
190
+ @file_progress.value = 0
191
+ @file_progress_label.text = "#{t(:file_progress)} #{t(:stage_preparing)}"
192
+ end
193
+ end
194
+
195
+ def options(path)
196
+ {
197
+ dpi: 150,
198
+ processor: :google_drive,
199
+ ocr_concurrency: Tahweel::Converter::DEFAULT_CONCURRENCY,
200
+ file_concurrency: 1,
201
+ formats: %i[txt docx],
202
+ base_input_path: File.directory?(path) ? path : File.dirname(path)
203
+ }
204
+ end
205
+
206
+ def update_file_progress(progress)
207
+ Glimmer::LibUI.queue_main do
208
+ @file_progress.value = progress[:percentage].to_i
209
+ @file_progress_label.text = "#{t(:file_progress)} (#{stage_text(progress[:stage])})"
210
+ end
211
+ end
212
+
213
+ def stage_text(stage)
214
+ case stage
215
+ when :splitting then t(:stage_splitting)
216
+ when :ocr then t(:stage_ocr)
217
+ end
218
+ end
219
+
220
+ def finish_conversion(folder, total_files)
221
+ Glimmer::LibUI.queue_main do
222
+ @global_progress.value = 100
223
+ @file_progress.value = 100
224
+ @global_progress_label.text = "#{t(:global_progress)} (#{total_files}/#{total_files})"
225
+ @file_progress_label.text = "#{t(:file_progress)} (#{t(:stage_done)})"
226
+
227
+ enable_window
228
+ msg_box(t(:msg_success_title), convert_finished_message(total_files))
229
+ end
230
+
231
+ Launchy.open(folder)
232
+ end
233
+
234
+ def enable_window
235
+ @convert_file_btn.enabled = true
236
+ @ar_convert_folder_btn.enabled = true
237
+ @en_convert_folder_btn.enabled = true
238
+ @language_btn.enabled = true
239
+ end
240
+
241
+ def convert_finished_message(files_count)
242
+ if @lang == :en
243
+ "Finished converting #{files_count} file(s) successfully."
244
+ else
245
+ arabic_convert_finished_message(files_count)
246
+ end
247
+ end
248
+
249
+ def arabic_convert_finished_message(files_count)
250
+ return "انتهى تحويل ملف واحد بنجاح." if files_count == 1
251
+ return "انتهى تحويل ملفين بنجاح." if files_count == 2
252
+
253
+ last_two_digits = files_count % 100
254
+
255
+ suffix = case last_two_digits
256
+ when 0..2 then "ملف"
257
+ when 3..10 then "ملفات"
258
+ else "ملفًا"
259
+ end
260
+
261
+ "انتهى تحويل #{files_count} #{suffix} بنجاح."
262
+ end
263
+
264
+ def toggle_language # rubocop:disable Metrics/AbcSize
265
+ @lang = @lang == :ar ? :en : :ar
266
+
267
+ @main_window.title = t(:window_title)
268
+ @header_label.text = t(:title_label)
269
+ @note_label.text = t(:note_label)
270
+ @convert_file_btn.text = t(:file_btn)
271
+ @language_btn.text = t(:language_btn)
272
+
273
+ @global_progress_label.text = t(:global_progress)
274
+ @file_progress_label.text = t(:file_progress)
275
+
276
+ @rtl_components.each { _1.visible = !_1.visible }
277
+ @ltr_components.each { _1.visible = !_1.visible }
278
+ end
279
+
280
+ def t(key, lang: nil) = TRANSLATIONS[lang || @lang][key]
281
+
282
+ def right_aligned_label(text)
283
+ ref = nil
284
+
285
+ horizontal_box do
286
+ right_alignment_label = label("") { stretchy true }
287
+ @rtl_components << right_alignment_label
288
+
289
+ ref = label(text) { stretchy false }
290
+
291
+ left_alignment_label = label("") { stretchy true }
292
+ left_alignment_label.visible = false
293
+ @ltr_components << left_alignment_label
294
+ end
295
+
296
+ ref
297
+ end
298
+ end
299
+
300
+ TahweelApp.launch unless defined?(Ocran)
@@ -1,12 +1,13 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "google/apis/drive_v3"
4
- require "googleauth"
5
- require "googleauth/stores/file_token_store"
3
+ require "fileutils"
6
4
  require "socket"
7
5
  require "uri"
6
+
7
+ require "googleauth"
8
+ require "googleauth/stores/file_token_store"
9
+ require "google/apis/drive_v3"
8
10
  require "launchy"
9
- require "fileutils"
10
11
  require "xdg"
11
12
 
12
13
  module Tahweel
@@ -104,7 +105,16 @@ module Tahweel
104
105
  end
105
106
 
106
107
  # Opens the system default browser to the Google Authorization URL.
107
- def open_browser_for_auth = Launchy.open(@authorizer.get_authorization_url(base_url: REDIRECT_URI))
108
+ def open_browser_for_auth
109
+ url = @authorizer.get_authorization_url(base_url: REDIRECT_URI)
110
+
111
+ if Gem.win_platform?
112
+ # https://github.com/copiousfreetime/launchy/issues/167
113
+ system("start \"\" \"#{url}\"")
114
+ else
115
+ Launchy.open(url)
116
+ end
117
+ end
108
118
 
109
119
  # Listens on the local server for the OAuth callback request.
110
120
  # Handles multiple incoming requests to filter out noise (like favicon.ico).
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "fileutils"
3
4
  require "pathname"
4
5
 
5
6
  module Tahweel
@@ -17,7 +18,7 @@ module Tahweel
17
18
  # @option options [String] :output The directory to save output files (defaults to current directory).
18
19
  # @option options [Integer] :dpi DPI for PDF conversion (defaults to 150).
19
20
  # @option options [Symbol] :processor The OCR processor to use (e.g., :google_drive).
20
- # @option options [Integer] :page_concurrency Max concurrent operations.
21
+ # @option options [Integer] :ocr_concurrency Max concurrent operations.
21
22
  # @option options [Array<Symbol>] :formats Output formats (e.g., [:txt, :docx]).
22
23
  # @option options [String] :page_separator Separator string for TXT output.
23
24
  # @option options [String] :base_input_path The base path used to determine relative output structure.
@@ -66,8 +67,11 @@ module Tahweel
66
67
 
67
68
  private
68
69
 
70
+ # Creates the output directory if it doesn't exist.
69
71
  def ensure_output_directory_exists = FileUtils.mkdir_p(output_directory)
70
72
 
73
+ # Checks if all requested output formats already exist.
74
+ # @return [Boolean] True if all output files exist.
71
75
  def all_outputs_exist?
72
76
  @options[:formats].all? do |format|
73
77
  extension = Tahweel::Writer.new(format: format).extension
@@ -75,22 +79,29 @@ module Tahweel
75
79
  end
76
80
  end
77
81
 
82
+ # Checks if the input file is a PDF.
83
+ # @return [Boolean]
78
84
  def pdf? = File.extname(@file_path).downcase == ".pdf"
79
85
 
86
+ # Handles PDF processing: splitting, OCR, and saving.
87
+ # @param &block [Proc] Progress callback.
80
88
  def process_pdf(&)
81
89
  texts = Tahweel.convert(
82
90
  @file_path,
83
91
  dpi: @options[:dpi],
84
92
  processor: @options[:processor],
85
- concurrency: @options.fetch(:page_concurrency, Tahweel::Converter::DEFAULT_CONCURRENCY),
93
+ concurrency: @options.fetch(:ocr_concurrency, Tahweel::Converter::DEFAULT_CONCURRENCY),
86
94
  &
87
95
  )
88
96
 
89
97
  write_output(texts)
90
98
  end
91
99
 
100
+ # Handles single image processing.
92
101
  def process_image = write_output([Tahweel.extract(@file_path, processor: @options[:processor])])
93
102
 
103
+ # Writes the extracted text to all configured formats.
104
+ # @param texts [Array<String>] The list of extracted texts (per page).
94
105
  def write_output(texts)
95
106
  Tahweel::Writer.write(
96
107
  texts,
@@ -100,6 +111,7 @@ module Tahweel
100
111
  )
101
112
  end
102
113
 
114
+ # @return [String] The full path for output files without extension.
103
115
  def base_output_path = File.join(output_directory, File.basename(@file_path, ".*"))
104
116
 
105
117
  # Determines the output directory.
@@ -3,6 +3,13 @@
3
3
  require "etc"
4
4
  require "optparse"
5
5
 
6
+ require_relative "../version"
7
+ require_relative "../converter"
8
+ require_relative "../ocr"
9
+ require_relative "../writer"
10
+ require_relative "../writers/txt"
11
+ require_relative "file_collector"
12
+
6
13
  module Tahweel
7
14
  module CLI
8
15
  # Parses command-line arguments for the Tahweel CLI.
@@ -26,11 +33,13 @@ module Tahweel
26
33
  options
27
34
  end
28
35
 
36
+ # Returns the default configuration options.
37
+ # @return [Hash] Default options.
29
38
  def self.default_options
30
39
  {
31
40
  dpi: 150,
32
41
  processor: :google_drive,
33
- page_concurrency: Tahweel::Converter::DEFAULT_CONCURRENCY,
42
+ ocr_concurrency: Tahweel::Converter::DEFAULT_CONCURRENCY,
34
43
  file_concurrency: (Etc.nprocessors - 2).clamp(2..),
35
44
  output: nil,
36
45
  formats: %i[txt docx],
@@ -38,6 +47,10 @@ module Tahweel
38
47
  }
39
48
  end
40
49
 
50
+ # Configures the OptionParser instance.
51
+ #
52
+ # @param opts [OptionParser] The parser instance.
53
+ # @param options [Hash] The options hash to populate.
41
54
  def self.configure_parser(opts, options) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
42
55
  opts.program_name = "tahweel"
43
56
  opts.version = Tahweel::VERSION
@@ -69,17 +82,17 @@ module Tahweel
69
82
  end
70
83
 
71
84
  opts.on(
72
- "-P", "--page-concurrency PAGE_CONCURRENCY", POSITIVE_INTEGER,
73
- "Max concurrent OCR operations (default: #{options[:page_concurrency]})"
85
+ "-F", "--file-concurrency FILE_CONCURRENCY", POSITIVE_INTEGER,
86
+ "Max concurrent files to process (default: CPUs - 2 = #{options[:file_concurrency]})"
74
87
  ) do |value|
75
- options[:page_concurrency] = value
88
+ options[:file_concurrency] = value
76
89
  end
77
90
 
78
91
  opts.on(
79
- "-F", "--file-concurrency FILE_CONCURRENCY", POSITIVE_INTEGER,
80
- "Max concurrent files to process (default: CPUs - 2 = #{options[:file_concurrency]})"
92
+ "-O", "--ocr-concurrency OCR_CONCURRENCY", POSITIVE_INTEGER,
93
+ "Max concurrent OCR operations (default: #{options[:ocr_concurrency]})"
81
94
  ) do |value|
82
- options[:file_concurrency] = value
95
+ options[:ocr_concurrency] = value
83
96
  end
84
97
 
85
98
  opts.on(
@@ -104,6 +117,10 @@ module Tahweel
104
117
  end
105
118
  end
106
119
 
120
+ # Validates that arguments were provided.
121
+ #
122
+ # @param args [Array<String>] The remaining arguments after parsing.
123
+ # @param parser [OptionParser] The parser instance for printing help.
107
124
  def self.validate_args!(args, parser)
108
125
  return unless args.empty?
109
126
 
@@ -102,6 +102,7 @@ module Tahweel
102
102
 
103
103
  private
104
104
 
105
+ # Starts a background thread to refresh the display periodically.
105
106
  def start_ticker
106
107
  @ticker_thread = Thread.new do
107
108
  while @running
@@ -111,6 +112,7 @@ module Tahweel
111
112
  end
112
113
  end
113
114
 
115
+ # Renders the progress dashboard to stdout.
114
116
  def render # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
115
117
  # Move cursor up to the start of our block
116
118
  $stdout.print "\e[#{@concurrency + 1}A"
@@ -138,6 +140,11 @@ module Tahweel
138
140
  $stdout.flush
139
141
  end
140
142
 
143
+ # Truncates a file path to fit within the display.
144
+ #
145
+ # @param path [String] The file path.
146
+ # @param max_length [Integer] Maximum allowed length.
147
+ # @return [String] The truncated path.
141
148
  def truncate_path(path, max_length)
142
149
  return path.ljust(max_length) if path.length <= max_length
143
150
 
@@ -1,8 +1,9 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "fileutils"
4
+
3
5
  require_relative "pdf_splitter"
4
6
  require_relative "ocr"
5
- require "fileutils"
6
7
 
7
8
  module Tahweel
8
9
  # Orchestrates the full conversion process:
@@ -60,10 +61,10 @@ module Tahweel
60
61
  # }
61
62
  # @return [Array<String>] An array containing the text of each page.
62
63
  def convert(&)
63
- image_paths, temp_dir = PdfSplitter.split(@pdf_path, dpi: @dpi, &).values_at(:image_paths, :folder_path)
64
+ images_paths, temp_dir = PdfSplitter.split(@pdf_path, dpi: @dpi, &).values_at(:images_paths, :folder_path)
64
65
 
65
66
  begin
66
- process_images(image_paths, Ocr.new(processor: @processor_type), &)
67
+ process_images(images_paths, Ocr.new(processor: @processor_type), &)
67
68
  ensure
68
69
  FileUtils.rm_rf(temp_dir)
69
70
  end
@@ -71,31 +72,55 @@ module Tahweel
71
72
 
72
73
  private
73
74
 
74
- def process_images(image_paths, ocr_engine, &)
75
- texts = Array.new(image_paths.size)
75
+ # Processes the list of images concurrently using the specified OCR engine.
76
+ #
77
+ # @param images_paths [Array<String>] List of paths to the image files.
78
+ # @param ocr_engine [Tahweel::Ocr] The initialized OCR engine instance.
79
+ # @param &block [Proc] Block to yield progress updates.
80
+ # @return [Array<String>] The text extracted from the images.
81
+ def process_images(images_paths, ocr_engine, &)
82
+ texts = Array.new(images_paths.size)
76
83
  mutex = Mutex.new
77
84
  processed_count = 0
78
85
 
79
- run_workers(build_queue(image_paths), ocr_engine, texts, mutex) do
86
+ run_workers(build_queue(images_paths), ocr_engine, texts, mutex) do
80
87
  processed_count += 1
81
- report_progress(processed_count, image_paths.size, &)
88
+ report_progress(processed_count, images_paths.size, &)
82
89
  end
83
90
 
84
91
  texts
85
92
  end
86
93
 
87
- def build_queue(image_paths)
94
+ # Builds a queue of images paths and their indices.
95
+ #
96
+ # @param images_paths [Array<String>] List of image paths.
97
+ # @return [Queue] A queue containing [path, index] tuples.
98
+ def build_queue(images_paths)
88
99
  queue = Queue.new
89
- image_paths.each_with_index { |path, index| queue << [path, index] }
100
+ images_paths.each_with_index { |path, index| queue << [path, index] }
90
101
  queue
91
102
  end
92
103
 
104
+ # Spawns worker threads to process items from the queue.
105
+ #
106
+ # @param queue [Queue] The queue of images to process.
107
+ # @param ocr_engine [Tahweel::Ocr] The OCR engine.
108
+ # @param texts [Array<String>] Shared array to store results.
109
+ # @param mutex [Mutex] Mutex for thread-safe updates.
110
+ # @param &block [Proc] Block to yield progress updates.
93
111
  def run_workers(queue, ocr_engine, texts, mutex, &)
94
112
  Array.new(@concurrency) do
95
113
  Thread.new { process_queue_items(queue, ocr_engine, texts, mutex, &) }
96
114
  end.each(&:join)
97
115
  end
98
116
 
117
+ # Processing loop for a single worker thread.
118
+ #
119
+ # @param queue [Queue] The shared queue.
120
+ # @param ocr_engine [Tahweel::Ocr] The OCR engine.
121
+ # @param texts [Array<String>] Shared result array.
122
+ # @param mutex [Mutex] Synchronization primitive.
123
+ # @param &block [Proc] Block to yield progress updates.
99
124
  def process_queue_items(queue, ocr_engine, texts, mutex, &)
100
125
  loop do
101
126
  begin
@@ -109,6 +134,13 @@ module Tahweel
109
134
  end
110
135
  end
111
136
 
137
+ # Thread-safe saving of OCR results.
138
+ #
139
+ # @param texts [Array<String>] The results array.
140
+ # @param index [Integer] Index of the current page.
141
+ # @param text [String] Extracted text.
142
+ # @param mutex [Mutex] Synchronization primitive.
143
+ # @yield Executes the progress reporting block within the lock.
112
144
  def save_result(texts, index, text, mutex)
113
145
  mutex.synchronize do
114
146
  texts[index] = text
@@ -116,6 +148,11 @@ module Tahweel
116
148
  end
117
149
  end
118
150
 
151
+ # Reports progress to the optional block.
152
+ #
153
+ # @param processed [Integer] Number of pages processed.
154
+ # @param total [Integer] Total number of pages.
155
+ # @yield [Hash] Progress information.
119
156
  def report_progress(processed, total)
120
157
  return unless block_given?
121
158
 
@@ -1,14 +1,15 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "etc"
3
4
  require "fileutils"
4
- require "rbconfig"
5
5
  require "securerandom"
6
6
  require "tmpdir"
7
- require "vips"
7
+
8
+ require_relative "poppler_installer"
8
9
 
9
10
  module Tahweel
10
11
  # Handles the logic for splitting a PDF file into individual image pages.
11
- # Uses the libvips library for high-performance image processing.
12
+ # Uses Poppler utils (pdftoppm, pdfinfo) for high-performance image processing.
12
13
  class PdfSplitter
13
14
  # Default DPI used when converting PDF pages to images.
14
15
  # 150 DPI is a good balance between quality and file size for general documents.
@@ -25,7 +26,7 @@ module Tahweel
25
26
  # percentage: Float,
26
27
  # remaining_pages: Integer
27
28
  # }
28
- # @return [Hash] A hash containing the :folder_path (String) and :image_paths (Array<String>).
29
+ # @return [Hash] A hash containing the :folder_path (String) and :images_paths (Array<String>).
29
30
  def self.split(pdf_path, dpi: DEFAULT_DPI, &) = new(pdf_path, dpi:).split(&)
30
31
 
31
32
  # Initializes a new PdfSplitter instance.
@@ -35,13 +36,12 @@ module Tahweel
35
36
  def initialize(pdf_path, dpi: DEFAULT_DPI)
36
37
  @pdf_path = pdf_path
37
38
  @dpi = dpi
38
- @image_paths = []
39
39
  end
40
40
 
41
41
  # Executes the PDF splitting process.
42
42
  #
43
43
  # This method performs the following steps:
44
- # 1. Checks if libvips is installed (skips on Windows).
44
+ # 1. Checks if Poppler utils are available (installs if missing on Windows).
45
45
  # 2. Validates the existence of the source PDF file.
46
46
  # 3. Creates a unique temporary directory for output.
47
47
  # 4. Iterates through each page of the PDF and converts it to a PNG image.
@@ -55,12 +55,11 @@ module Tahweel
55
55
  # }
56
56
  # @return [Hash] Result hash with keys:
57
57
  # - :folder_path [String] The absolute path to the temporary directory containing the images.
58
- # - :image_paths [Array<String>] List of absolute paths for each generated image file.
59
- # @raise [RuntimeError] If the PDF file is not found or libvips is missing.
60
- # @raise [Vips::Error] If the underlying VIPS library encounters an error during processing.
58
+ # - :images_paths [Array<String>] List of absolute paths for each generated image file.
59
+ # @raise [RuntimeError] If the PDF file is not found.
61
60
  def split(&)
62
- check_libvips_installed!
63
61
  validate_file_exists!
62
+ PopplerInstaller.ensure_installed!
64
63
  setup_output_directory
65
64
  process_pages(&)
66
65
  result
@@ -68,20 +67,7 @@ module Tahweel
68
67
 
69
68
  private
70
69
 
71
- attr_reader :pdf_path, :dpi, :image_paths, :output_dir
72
-
73
- # Checks if the `vips` CLI tool is available in the system PATH.
74
- # Skips this check on Windows systems, assuming the environment is managed differently.
75
- # Aborts execution with an error message if vips is missing.
76
- def check_libvips_installed!
77
- return if /mswin|mingw|cygwin/.match?(RbConfig::CONFIG["host_os"])
78
- return if system("vips --version", out: File::NULL, err: File::NULL)
79
-
80
- abort "Error: libvips is not installed. Please install it before using Tahweel.\n" \
81
- "MacOS: `brew install vips`\n" \
82
- "Ubuntu: `sudo apt install libvips42`\n" \
83
- "Windows: Already installed with the Tahweel gem"
84
- end
70
+ attr_reader :pdf_path, :dpi, :output_dir
85
71
 
86
72
  # Ensures the source PDF file actually exists.
87
73
  # @raise [RuntimeError] if the file is missing.
@@ -106,33 +92,101 @@ module Tahweel
106
92
  # }
107
93
  # @return [void]
108
94
  def process_pages(&)
109
- total_pages.times do |i|
110
- extract_page(i)
95
+ mutex = Mutex.new
96
+ processed_count = 0
97
+
98
+ run_workers(build_queue, mutex) do
99
+ processed_count += 1
100
+ report_progress(processed_count, &)
101
+ end
102
+ end
103
+
104
+ # Builds a queue containing all page indices to be processed.
105
+ # @return [Queue] The queue populated with page numbers.
106
+ def build_queue
107
+ queue = Queue.new
108
+ total_pages.times { queue << _1 }
109
+ queue
110
+ end
111
111
 
112
- next unless block_given?
112
+ # Spawns and manages worker threads to process the queue.
113
+ #
114
+ # @param queue [Queue] The queue of pages to process.
115
+ # @param mutex [Mutex] Synchronization primitive for thread safety.
116
+ # @param &block [Proc] Block to execute when a page is processed.
117
+ def run_workers(queue, mutex, &)
118
+ concurrency = (Etc.nprocessors - 2).clamp(2..)
119
+
120
+ Array.new([concurrency, total_pages].min) do
121
+ Thread.new { process_queue_items(queue, mutex, &) }
122
+ end.each(&:join)
123
+ end
113
124
 
114
- yield({
115
- file_path: @pdf_path, stage: :splitting,
116
- current_page: i + 1,
117
- percentage: (((i + 1).to_f / total_pages) * 100).round(2),
118
- remaining_pages: total_pages - (i + 1)
119
- })
125
+ # Processing loop for individual worker threads.
126
+ #
127
+ # @param queue [Queue] The shared queue of pages.
128
+ # @param mutex [Mutex] Synchronization primitive.
129
+ # @param &block [Proc] Block to yield for progress updates.
130
+ def process_queue_items(queue, mutex, &)
131
+ loop do
132
+ begin
133
+ page_num = queue.pop(true)
134
+ rescue ThreadError
135
+ break
136
+ end
137
+
138
+ extract_page(page_num)
139
+
140
+ mutex.synchronize(&)
120
141
  end
121
142
  end
122
143
 
144
+ # Reports progress back to the caller.
145
+ #
146
+ # @param processed [Integer] Number of pages processed so far.
147
+ # @param &block [Proc] The progress callback block.
148
+ def report_progress(processed, &)
149
+ return unless block_given?
150
+
151
+ yield({
152
+ file_path: @pdf_path, stage: :splitting,
153
+ current_page: processed,
154
+ percentage: ((processed.to_f / total_pages) * 100).round(2),
155
+ remaining_pages: total_pages - processed
156
+ })
157
+ end
158
+
123
159
  # Calculates the total number of pages in the PDF by loading the first page metadata.
124
160
  # @return [Integer] The page count.
125
161
  def total_pages
126
- @total_pages ||= Vips::Image.pdfload(pdf_path, page: 0, dpi: dpi, access: :sequential).get("pdf-n_pages")
162
+ @total_pages ||= begin
163
+ output = `#{PopplerInstaller.pdfinfo_path} "#{pdf_path}"`.encode(
164
+ "UTF-8",
165
+ invalid: :replace, undef: :replace, replace: ""
166
+ )
167
+
168
+ pages = output[/Pages:\s*(\d+)/, 1]
169
+ raise "Failed to get page count from PDF: #{output}" unless pages
170
+
171
+ pages.to_i
172
+ end
127
173
  end
128
174
 
129
175
  # Extracts a specific page from the PDF and saves it as a PNG.
130
176
  #
131
177
  # @param page_num [Integer] The zero-based index of the page to extract.
132
178
  def extract_page(page_num)
133
- output_path = File.join(output_dir, "page_#{page_num + 1}.png")
134
- Vips::Image.pdfload(pdf_path, page: page_num, dpi: dpi, access: :sequential).write_to_file(output_path)
135
- image_paths << output_path
179
+ output_prefix = File.join(output_dir, "page")
180
+
181
+ system(
182
+ PopplerInstaller.pdftoppm_path,
183
+ "-png",
184
+ "-r", dpi.to_s,
185
+ "-f", (page_num + 1).to_s,
186
+ "-l", (page_num + 1).to_s,
187
+ pdf_path,
188
+ output_prefix
189
+ )
136
190
  end
137
191
 
138
192
  # Constructs the final result hash.
@@ -140,7 +194,7 @@ module Tahweel
140
194
  def result
141
195
  {
142
196
  folder_path: output_dir,
143
- image_paths: image_paths
197
+ images_paths: Dir.glob(File.join(output_dir, "page-*.png")).sort!
144
198
  }
145
199
  end
146
200
  end
@@ -0,0 +1,185 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "fileutils"
4
+ require "json"
5
+ require "net/http"
6
+ require "open-uri"
7
+ require "uri"
8
+
9
+ require "xdg"
10
+ require "zip"
11
+
12
+ module Tahweel
13
+ # Handles the installation and path resolution for Poppler utilities.
14
+ #
15
+ # On Windows, this class can automatically download and install the necessary
16
+ # binaries if they are not present. On other platforms, it provides instructions
17
+ # for manual installation.
18
+ class PopplerInstaller
19
+ POPPLER_REPO_API = "https://api.github.com/repos/oschwartz10612/poppler-windows/releases/latest"
20
+
21
+ # Ensures that Poppler utilities are installed.
22
+ #
23
+ # On Windows: Installs Poppler locally if not found.
24
+ # On other platforms: Aborts with an error message if Poppler is missing.
25
+ #
26
+ # @raise [SystemExit] if Poppler is missing on non-Windows platforms.
27
+ def self.ensure_installed! # rubocop:disable Metrics/MethodLength
28
+ installer = new
29
+ return if installer.installed?
30
+
31
+ if Gem.win_platform?
32
+ installer.install
33
+ else
34
+ abort <<~MSG
35
+ Error: Poppler utilities are not installed. Please install them:
36
+ MacOS: `brew install poppler`
37
+ Ubuntu: `sudo apt install poppler-utils`
38
+ MSG
39
+ end
40
+ end
41
+
42
+ # Returns the path to the `pdftoppm` executable.
43
+ # @return [String] path to the executable.
44
+ def self.pdftoppm_path = new.pdftoppm_path
45
+
46
+ # Returns the path to the `pdfinfo` executable.
47
+ # @return [String] path to the executable.
48
+ def self.pdfinfo_path = new.pdfinfo_path
49
+
50
+ # Installs Poppler binaries on Windows.
51
+ #
52
+ # Downloads the latest release from GitHub and extracts it to the cache directory.
53
+ # Does nothing if already installed.
54
+ def install
55
+ zip_path = nil
56
+ return if installed?
57
+
58
+ zip_path = download_release_file
59
+ extract_zip_file(zip_path)
60
+ ensure
61
+ FileUtils.rm_f(zip_path) if zip_path
62
+ end
63
+
64
+ # Checks if Poppler utilities are available.
65
+ #
66
+ # @return [Boolean] true if `pdftoppm` and `pdfinfo` are in the PATH or cached.
67
+ def installed? = (command_exists?("pdftoppm") && command_exists?("pdfinfo")) || cached?
68
+
69
+ # Checks if Poppler binaries are present in the local cache (Windows only).
70
+ #
71
+ # @return [Boolean] true if cached binaries exist.
72
+ def cached?
73
+ return false unless Gem.win_platform?
74
+
75
+ File.exist?(File.join(cached_bin_path, "pdftoppm.exe"))
76
+ end
77
+
78
+ # Resolves the path to the `pdftoppm` executable.
79
+ #
80
+ # Prioritizes the system PATH, falling back to the cached version on Windows.
81
+ #
82
+ # @return [String] path to `pdftoppm`.
83
+ def pdftoppm_path
84
+ return "pdftoppm" if command_exists?("pdftoppm")
85
+
86
+ Gem.win_platform? ? File.join(cached_bin_path, "pdftoppm.exe") : nil
87
+ end
88
+
89
+ # Resolves the path to the `pdfinfo` executable.
90
+ #
91
+ # Prioritizes the system PATH, falling back to the cached version on Windows.
92
+ #
93
+ # @return [String] path to `pdfinfo`.
94
+ def pdfinfo_path
95
+ return "pdfinfo" if command_exists?("pdfinfo")
96
+
97
+ Gem.win_platform? ? File.join(cached_bin_path, "pdfinfo.exe") : nil
98
+ end
99
+
100
+ private
101
+
102
+ # Locates the `bin` directory within the cached Poppler installation.
103
+ #
104
+ # Searches for a directory matching "poppler-*" in the cache directory and returns
105
+ # the path to its `Library/bin` subdirectory.
106
+ #
107
+ # @return [String] Path to the `bin` directory, or an empty string if not found.
108
+ def cached_bin_path
109
+ poppler_root = Dir.glob(File.join(cache_dir, "poppler-*")).first
110
+ return "" unless poppler_root
111
+
112
+ File.join(poppler_root, "Library", "bin")
113
+ end
114
+
115
+ # Checks if a command is available in the system path.
116
+ #
117
+ # @param cmd [String] The command to check for.
118
+ # @return [Boolean] true if the command exists in the PATH.
119
+ def command_exists?(cmd)
120
+ tool = Gem.win_platform? ? "where" : "which"
121
+ system("#{tool} #{cmd} > #{File::NULL} 2>&1")
122
+ end
123
+
124
+ # Downloads the latest Poppler release zip file.
125
+ #
126
+ # Fetches the download URL from the GitHub API and saves the file to the cache directory.
127
+ #
128
+ # @return [String] The local path to the downloaded zip file.
129
+ def download_release_file
130
+ release_url = latest_release_url
131
+ zip_path = File.join(cache_dir, File.basename(release_url))
132
+ URI.parse(release_url).open { File.binwrite(zip_path, _1.read) }
133
+ zip_path
134
+ end
135
+
136
+ # Retrieves the download URL for the latest Windows release of Poppler.
137
+ #
138
+ # Queries the GitHub API for the latest release and finds the asset matching "Release*.zip".
139
+ #
140
+ # @return [String] The download URL of the asset.
141
+ # @raise [SystemExit] if the API request fails or no valid asset is found.
142
+ def latest_release_url # rubocop:disable Metrics/AbcSize
143
+ uri = URI(POPPLER_REPO_API)
144
+ request = Net::HTTP::Get.new(uri)
145
+ request["User-Agent"] = "Tahweel-Gem"
146
+
147
+ response = Net::HTTP.start(uri.hostname, uri.port, use_ssl: true) { _1.request(request) }
148
+
149
+ unless response.is_a?(Net::HTTPSuccess)
150
+ abort "Failed to fetch Poppler release info: #{response.code} #{response.message}"
151
+ end
152
+
153
+ asset = JSON.parse(response.body)["assets"].find { _1["name"].match?(/^Release.*\.zip$/) }
154
+
155
+ asset ? asset["browser_download_url"] : abort("No valid Windows release found for Poppler.")
156
+ end
157
+
158
+ # Extracts the downloaded zip file to the cache directory.
159
+ #
160
+ # @param zip_path [String] Path to the zip file to extract.
161
+ def extract_zip_file(zip_path)
162
+ Zip::File.open(zip_path) do |zip_file|
163
+ zip_file.each do |entry|
164
+ entry_dest = File.join(cache_dir, entry.name)
165
+ FileUtils.mkdir_p(File.dirname(entry_dest))
166
+ zip_file.extract(entry, entry_dest) { true }
167
+ end
168
+ end
169
+ end
170
+
171
+ # Resolves the directory used for caching downloaded binaries.
172
+ #
173
+ # Uses the XDG cache home directory if available, otherwise defaults to `~/.cache/tahweel/poppler`.
174
+ #
175
+ # @return [String] Path to the cache directory.
176
+ def cache_dir
177
+ base = XDG.new.cache_home.to_s
178
+ base = File.join(Dir.home, ".cache") if base.empty?
179
+
180
+ dir = File.join(base, "tahweel", "poppler")
181
+ FileUtils.mkdir_p(dir)
182
+ dir
183
+ end
184
+ end
185
+ end
@@ -43,7 +43,7 @@ module Tahweel
43
43
 
44
44
  begin
45
45
  file_id = upload_file(file_path)
46
- download_text(file_id).gsub("\r\n", "\n").gsub("________________", "").strip
46
+ download_text(file_id).gsub("________________", "").strip
47
47
  ensure
48
48
  delete_file(file_id)
49
49
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Tahweel
4
- VERSION = "0.1.1"
4
+ VERSION = "0.1.2"
5
5
  end
@@ -1,8 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "writers/txt"
4
3
  require_relative "writers/docx"
5
4
  require_relative "writers/json"
5
+ require_relative "writers/txt"
6
6
 
7
7
  module Tahweel
8
8
  # Factory class for writing extracted text to different formats.
@@ -14,10 +14,11 @@ module Tahweel
14
14
  # Writes the extracted texts to a file.
15
15
  #
16
16
  # It applies several transformations to the text before writing:
17
- # 1. Normalizes line endings to `\n`.
17
+ # 1. Normalizes all line endings (`\r\n`, `\r`) to `\n`.
18
18
  # 2. Collapses consecutive identical whitespace characters.
19
19
  # 3. Compacts the text by merging short lines if the page is too long (> 40 lines).
20
20
  # 4. Determines text alignment (RTL/LTR) based on content.
21
+ # 5. Converts `\n` to proper OOXML line breaks for cross-platform compatibility.
21
22
  #
22
23
  # @param texts [Array<String>] The extracted texts (one per page).
23
24
  # @param destination [String] The output file path.
@@ -26,10 +27,10 @@ module Tahweel
26
27
  def write(texts, destination, options = {}) # rubocop:disable Lint/UnusedMethodArgument
27
28
  Caracal::Document.save(destination) do |docx|
28
29
  texts.each_with_index do |text, index|
29
- text = text.gsub(/(\r\n)+/, "\n").gsub(/(\s)\1+/, '\1').strip
30
+ text = text.gsub(/\r\n?/, "\n").gsub(/(\s)\1+/, '\1').strip
30
31
  text = compact_shortest_lines(text) while expected_lines_in_page(text) > 40
31
32
 
32
- docx.p text, size: 20, align: alignment_for(text)
33
+ write_paragraph(docx, text)
33
34
 
34
35
  docx.page if index < texts.size - 1
35
36
  end
@@ -38,6 +39,28 @@ module Tahweel
38
39
 
39
40
  private
40
41
 
42
+ # Writes a paragraph with proper OOXML line breaks.
43
+ #
44
+ # Raw newline characters (\n, \r\n) are not valid line breaks in DOCX format.
45
+ # Microsoft Word on Windows requires proper <w:br/> elements for line breaks,
46
+ # while macOS Pages is more lenient. This method uses Caracal's `br` method
47
+ # to insert cross-platform compatible line breaks.
48
+ #
49
+ # @param docx [Caracal::Document] The document to write to.
50
+ # @param text [String] The text content with newlines.
51
+ # @return [void]
52
+ def write_paragraph(docx, text)
53
+ lines = text.split("\n")
54
+ alignment = alignment_for(text)
55
+
56
+ docx.p align: alignment do
57
+ lines.each_with_index do |line, line_index|
58
+ text line, size: 20
59
+ br if line_index < lines.size - 1
60
+ end
61
+ end
62
+ end
63
+
41
64
  # Determines the text alignment based on the ratio of Arabic to non-Arabic characters.
42
65
  #
43
66
  # @param text [String] The text to analyze.
data/lib/tahweel.rb CHANGED
@@ -1,14 +1,19 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative "tahweel/version"
4
+ require_relative "tahweel/cli/options"
5
+ require_relative "tahweel/cli/file_processor"
6
+ require_relative "tahweel/cli/file_collector"
4
7
  require_relative "tahweel/authorizer"
8
+ require_relative "tahweel/poppler_installer"
5
9
  require_relative "tahweel/pdf_splitter"
6
10
  require_relative "tahweel/ocr"
11
+ require_relative "tahweel/processors/google_drive"
7
12
  require_relative "tahweel/converter"
8
13
  require_relative "tahweel/writer"
9
- require_relative "tahweel/cli/file_processor"
10
- require_relative "tahweel/cli/file_collector"
11
- require_relative "tahweel/cli/options"
14
+ require_relative "tahweel/writers/txt"
15
+ require_relative "tahweel/writers/docx"
16
+ require_relative "tahweel/writers/json"
12
17
 
13
18
  module Tahweel # rubocop:disable Style/Documentation
14
19
  class Error < StandardError; end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tahweel
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ali Hamdi Ali Fadel
@@ -23,6 +23,48 @@ dependencies:
23
23
  - - "~>"
24
24
  - !ruby/object:Gem::Version
25
25
  version: '1.4'
26
+ - !ruby/object:Gem::Dependency
27
+ name: csv
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - "~>"
31
+ - !ruby/object:Gem::Version
32
+ version: '3.3'
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: '3.3'
40
+ - !ruby/object:Gem::Dependency
41
+ name: fiddle
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - "~>"
45
+ - !ruby/object:Gem::Version
46
+ version: '1.1'
47
+ type: :runtime
48
+ prerelease: false
49
+ version_requirements: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - "~>"
52
+ - !ruby/object:Gem::Version
53
+ version: '1.1'
54
+ - !ruby/object:Gem::Dependency
55
+ name: glimmer-dsl-libui
56
+ requirement: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - "~>"
59
+ - !ruby/object:Gem::Version
60
+ version: 0.13.1
61
+ type: :runtime
62
+ prerelease: false
63
+ version_requirements: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - "~>"
66
+ - !ruby/object:Gem::Version
67
+ version: 0.13.1
26
68
  - !ruby/object:Gem::Dependency
27
69
  name: google-apis-drive_v3
28
70
  requirement: !ruby/object:Gem::Requirement
@@ -79,20 +121,6 @@ dependencies:
79
121
  - - "~>"
80
122
  - !ruby/object:Gem::Version
81
123
  version: '3.3'
82
- - !ruby/object:Gem::Dependency
83
- name: ruby-vips
84
- requirement: !ruby/object:Gem::Requirement
85
- requirements:
86
- - - "~>"
87
- - !ruby/object:Gem::Version
88
- version: '2.2'
89
- type: :runtime
90
- prerelease: false
91
- version_requirements: !ruby/object:Gem::Requirement
92
- requirements:
93
- - - "~>"
94
- - !ruby/object:Gem::Version
95
- version: '2.2'
96
124
  - !ruby/object:Gem::Dependency
97
125
  name: xdg
98
126
  requirement: !ruby/object:Gem::Requirement
@@ -114,6 +142,7 @@ email:
114
142
  executables:
115
143
  - tahweel
116
144
  - tahweel-clear
145
+ - tahweel-ui
117
146
  extensions: []
118
147
  extra_rdoc_files: []
119
148
  files:
@@ -124,8 +153,11 @@ files:
124
153
  - LICENSE.txt
125
154
  - README.md
126
155
  - Rakefile
156
+ - assets/logo.png
157
+ - assets/windows/tahweel.ico
127
158
  - bin/tahweel
128
159
  - bin/tahweel-clear
160
+ - bin/tahweel-ui
129
161
  - lib/tahweel.rb
130
162
  - lib/tahweel/authorizer.rb
131
163
  - lib/tahweel/cli/file_collector.rb
@@ -135,6 +167,7 @@ files:
135
167
  - lib/tahweel/converter.rb
136
168
  - lib/tahweel/ocr.rb
137
169
  - lib/tahweel/pdf_splitter.rb
170
+ - lib/tahweel/poppler_installer.rb
138
171
  - lib/tahweel/processors/google_drive.rb
139
172
  - lib/tahweel/templates/success.html
140
173
  - lib/tahweel/version.rb