tahweel 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.vscode/settings.json +1 -0
- data/CHANGELOG.md +13 -0
- data/README.en.md +468 -0
- data/README.md +464 -20
- data/assets/logo.png +0 -0
- data/assets/windows/tahweel.ico +0 -0
- data/bin/tahweel +10 -5
- data/bin/tahweel-ui +300 -0
- data/lib/tahweel/authorizer.rb +17 -7
- data/lib/tahweel/cli/file_processor.rb +14 -2
- data/lib/tahweel/cli/options.rb +24 -7
- data/lib/tahweel/cli/progress_renderer.rb +7 -0
- data/lib/tahweel/converter.rb +46 -9
- data/lib/tahweel/pdf_splitter.rb +92 -38
- data/lib/tahweel/poppler_installer.rb +185 -0
- data/lib/tahweel/processors/google_drive.rb +1 -1
- data/lib/tahweel/version.rb +1 -1
- data/lib/tahweel/writer.rb +1 -1
- data/lib/tahweel/writers/docx.rb +26 -3
- data/lib/tahweel.rb +8 -3
- data/website/favicon.ico +0 -0
- data/website/index.html +792 -0
- data/website/logo.png +0 -0
- data/website/privacy.html +489 -0
- metadata +53 -15
data/lib/tahweel/pdf_splitter.rb
CHANGED
|
@@ -1,14 +1,15 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require "etc"
|
|
3
4
|
require "fileutils"
|
|
4
|
-
require "rbconfig"
|
|
5
5
|
require "securerandom"
|
|
6
6
|
require "tmpdir"
|
|
7
|
-
|
|
7
|
+
|
|
8
|
+
require_relative "poppler_installer"
|
|
8
9
|
|
|
9
10
|
module Tahweel
|
|
10
11
|
# Handles the logic for splitting a PDF file into individual image pages.
|
|
11
|
-
# Uses
|
|
12
|
+
# Uses Poppler utils (pdftoppm, pdfinfo) for high-performance image processing.
|
|
12
13
|
class PdfSplitter
|
|
13
14
|
# Default DPI used when converting PDF pages to images.
|
|
14
15
|
# 150 DPI is a good balance between quality and file size for general documents.
|
|
@@ -25,7 +26,7 @@ module Tahweel
|
|
|
25
26
|
# percentage: Float,
|
|
26
27
|
# remaining_pages: Integer
|
|
27
28
|
# }
|
|
28
|
-
# @return [Hash] A hash containing the :folder_path (String) and :
|
|
29
|
+
# @return [Hash] A hash containing the :folder_path (String) and :images_paths (Array<String>).
|
|
29
30
|
def self.split(pdf_path, dpi: DEFAULT_DPI, &) = new(pdf_path, dpi:).split(&)
|
|
30
31
|
|
|
31
32
|
# Initializes a new PdfSplitter instance.
|
|
@@ -35,13 +36,12 @@ module Tahweel
|
|
|
35
36
|
def initialize(pdf_path, dpi: DEFAULT_DPI)
|
|
36
37
|
@pdf_path = pdf_path
|
|
37
38
|
@dpi = dpi
|
|
38
|
-
@image_paths = []
|
|
39
39
|
end
|
|
40
40
|
|
|
41
41
|
# Executes the PDF splitting process.
|
|
42
42
|
#
|
|
43
43
|
# This method performs the following steps:
|
|
44
|
-
# 1. Checks if
|
|
44
|
+
# 1. Checks if Poppler utils are available (installs if missing on Windows).
|
|
45
45
|
# 2. Validates the existence of the source PDF file.
|
|
46
46
|
# 3. Creates a unique temporary directory for output.
|
|
47
47
|
# 4. Iterates through each page of the PDF and converts it to a PNG image.
|
|
@@ -55,12 +55,11 @@ module Tahweel
|
|
|
55
55
|
# }
|
|
56
56
|
# @return [Hash] Result hash with keys:
|
|
57
57
|
# - :folder_path [String] The absolute path to the temporary directory containing the images.
|
|
58
|
-
# - :
|
|
59
|
-
# @raise [RuntimeError] If the PDF file is not found
|
|
60
|
-
# @raise [Vips::Error] If the underlying VIPS library encounters an error during processing.
|
|
58
|
+
# - :images_paths [Array<String>] List of absolute paths for each generated image file.
|
|
59
|
+
# @raise [RuntimeError] If the PDF file is not found.
|
|
61
60
|
def split(&)
|
|
62
|
-
check_libvips_installed!
|
|
63
61
|
validate_file_exists!
|
|
62
|
+
PopplerInstaller.ensure_installed!
|
|
64
63
|
setup_output_directory
|
|
65
64
|
process_pages(&)
|
|
66
65
|
result
|
|
@@ -68,20 +67,7 @@ module Tahweel
|
|
|
68
67
|
|
|
69
68
|
private
|
|
70
69
|
|
|
71
|
-
attr_reader :pdf_path, :dpi, :
|
|
72
|
-
|
|
73
|
-
# Checks if the `vips` CLI tool is available in the system PATH.
|
|
74
|
-
# Skips this check on Windows systems, assuming the environment is managed differently.
|
|
75
|
-
# Aborts execution with an error message if vips is missing.
|
|
76
|
-
def check_libvips_installed!
|
|
77
|
-
return if /mswin|mingw|cygwin/.match?(RbConfig::CONFIG["host_os"])
|
|
78
|
-
return if system("vips --version", out: File::NULL, err: File::NULL)
|
|
79
|
-
|
|
80
|
-
abort "Error: libvips is not installed. Please install it before using Tahweel.\n" \
|
|
81
|
-
"MacOS: `brew install vips`\n" \
|
|
82
|
-
"Ubuntu: `sudo apt install libvips42`\n" \
|
|
83
|
-
"Windows: Already installed with the Tahweel gem"
|
|
84
|
-
end
|
|
70
|
+
attr_reader :pdf_path, :dpi, :output_dir
|
|
85
71
|
|
|
86
72
|
# Ensures the source PDF file actually exists.
|
|
87
73
|
# @raise [RuntimeError] if the file is missing.
|
|
@@ -106,33 +92,101 @@ module Tahweel
|
|
|
106
92
|
# }
|
|
107
93
|
# @return [void]
|
|
108
94
|
def process_pages(&)
|
|
109
|
-
|
|
110
|
-
|
|
95
|
+
mutex = Mutex.new
|
|
96
|
+
processed_count = 0
|
|
97
|
+
|
|
98
|
+
run_workers(build_queue, mutex) do
|
|
99
|
+
processed_count += 1
|
|
100
|
+
report_progress(processed_count, &)
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# Builds a queue containing all page indices to be processed.
|
|
105
|
+
# @return [Queue] The queue populated with page numbers.
|
|
106
|
+
def build_queue
|
|
107
|
+
queue = Queue.new
|
|
108
|
+
total_pages.times { queue << _1 }
|
|
109
|
+
queue
|
|
110
|
+
end
|
|
111
111
|
|
|
112
|
-
|
|
112
|
+
# Spawns and manages worker threads to process the queue.
|
|
113
|
+
#
|
|
114
|
+
# @param queue [Queue] The queue of pages to process.
|
|
115
|
+
# @param mutex [Mutex] Synchronization primitive for thread safety.
|
|
116
|
+
# @param &block [Proc] Block to execute when a page is processed.
|
|
117
|
+
def run_workers(queue, mutex, &)
|
|
118
|
+
concurrency = (Etc.nprocessors - 2).clamp(2..)
|
|
119
|
+
|
|
120
|
+
Array.new([concurrency, total_pages].min) do
|
|
121
|
+
Thread.new { process_queue_items(queue, mutex, &) }
|
|
122
|
+
end.each(&:join)
|
|
123
|
+
end
|
|
113
124
|
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
125
|
+
# Processing loop for individual worker threads.
|
|
126
|
+
#
|
|
127
|
+
# @param queue [Queue] The shared queue of pages.
|
|
128
|
+
# @param mutex [Mutex] Synchronization primitive.
|
|
129
|
+
# @param &block [Proc] Block to yield for progress updates.
|
|
130
|
+
def process_queue_items(queue, mutex, &)
|
|
131
|
+
loop do
|
|
132
|
+
begin
|
|
133
|
+
page_num = queue.pop(true)
|
|
134
|
+
rescue ThreadError
|
|
135
|
+
break
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
extract_page(page_num)
|
|
139
|
+
|
|
140
|
+
mutex.synchronize(&)
|
|
120
141
|
end
|
|
121
142
|
end
|
|
122
143
|
|
|
144
|
+
# Reports progress back to the caller.
|
|
145
|
+
#
|
|
146
|
+
# @param processed [Integer] Number of pages processed so far.
|
|
147
|
+
# @param &block [Proc] The progress callback block.
|
|
148
|
+
def report_progress(processed, &)
|
|
149
|
+
return unless block_given?
|
|
150
|
+
|
|
151
|
+
yield({
|
|
152
|
+
file_path: @pdf_path, stage: :splitting,
|
|
153
|
+
current_page: processed,
|
|
154
|
+
percentage: ((processed.to_f / total_pages) * 100).round(2),
|
|
155
|
+
remaining_pages: total_pages - processed
|
|
156
|
+
})
|
|
157
|
+
end
|
|
158
|
+
|
|
123
159
|
# Calculates the total number of pages in the PDF by loading the first page metadata.
|
|
124
160
|
# @return [Integer] The page count.
|
|
125
161
|
def total_pages
|
|
126
|
-
@total_pages ||=
|
|
162
|
+
@total_pages ||= begin
|
|
163
|
+
output = `#{PopplerInstaller.pdfinfo_path} "#{pdf_path}"`.encode(
|
|
164
|
+
"UTF-8",
|
|
165
|
+
invalid: :replace, undef: :replace, replace: ""
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
pages = output[/Pages:\s*(\d+)/, 1]
|
|
169
|
+
raise "Failed to get page count from PDF: #{output}" unless pages
|
|
170
|
+
|
|
171
|
+
pages.to_i
|
|
172
|
+
end
|
|
127
173
|
end
|
|
128
174
|
|
|
129
175
|
# Extracts a specific page from the PDF and saves it as a PNG.
|
|
130
176
|
#
|
|
131
177
|
# @param page_num [Integer] The zero-based index of the page to extract.
|
|
132
178
|
def extract_page(page_num)
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
179
|
+
output_prefix = File.join(output_dir, "page")
|
|
180
|
+
|
|
181
|
+
system(
|
|
182
|
+
PopplerInstaller.pdftoppm_path,
|
|
183
|
+
"-png",
|
|
184
|
+
"-r", dpi.to_s,
|
|
185
|
+
"-f", (page_num + 1).to_s,
|
|
186
|
+
"-l", (page_num + 1).to_s,
|
|
187
|
+
pdf_path,
|
|
188
|
+
output_prefix
|
|
189
|
+
)
|
|
136
190
|
end
|
|
137
191
|
|
|
138
192
|
# Constructs the final result hash.
|
|
@@ -140,7 +194,7 @@ module Tahweel
|
|
|
140
194
|
def result
|
|
141
195
|
{
|
|
142
196
|
folder_path: output_dir,
|
|
143
|
-
|
|
197
|
+
images_paths: Dir.glob(File.join(output_dir, "page-*.png")).sort!
|
|
144
198
|
}
|
|
145
199
|
end
|
|
146
200
|
end
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "fileutils"
|
|
4
|
+
require "json"
|
|
5
|
+
require "net/http"
|
|
6
|
+
require "open-uri"
|
|
7
|
+
require "uri"
|
|
8
|
+
|
|
9
|
+
require "xdg"
|
|
10
|
+
require "zip"
|
|
11
|
+
|
|
12
|
+
module Tahweel
|
|
13
|
+
# Handles the installation and path resolution for Poppler utilities.
|
|
14
|
+
#
|
|
15
|
+
# On Windows, this class can automatically download and install the necessary
|
|
16
|
+
# binaries if they are not present. On other platforms, it provides instructions
|
|
17
|
+
# for manual installation.
|
|
18
|
+
class PopplerInstaller
|
|
19
|
+
POPPLER_REPO_API = "https://api.github.com/repos/oschwartz10612/poppler-windows/releases/latest"
|
|
20
|
+
|
|
21
|
+
# Ensures that Poppler utilities are installed.
|
|
22
|
+
#
|
|
23
|
+
# On Windows: Installs Poppler locally if not found.
|
|
24
|
+
# On other platforms: Aborts with an error message if Poppler is missing.
|
|
25
|
+
#
|
|
26
|
+
# @raise [SystemExit] if Poppler is missing on non-Windows platforms.
|
|
27
|
+
def self.ensure_installed! # rubocop:disable Metrics/MethodLength
|
|
28
|
+
installer = new
|
|
29
|
+
return if installer.installed?
|
|
30
|
+
|
|
31
|
+
if Gem.win_platform?
|
|
32
|
+
installer.install
|
|
33
|
+
else
|
|
34
|
+
abort <<~MSG
|
|
35
|
+
Error: Poppler utilities are not installed. Please install them:
|
|
36
|
+
MacOS: `brew install poppler`
|
|
37
|
+
Ubuntu: `sudo apt install poppler-utils`
|
|
38
|
+
MSG
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Returns the path to the `pdftoppm` executable.
|
|
43
|
+
# @return [String] path to the executable.
|
|
44
|
+
def self.pdftoppm_path = new.pdftoppm_path
|
|
45
|
+
|
|
46
|
+
# Returns the path to the `pdfinfo` executable.
|
|
47
|
+
# @return [String] path to the executable.
|
|
48
|
+
def self.pdfinfo_path = new.pdfinfo_path
|
|
49
|
+
|
|
50
|
+
# Installs Poppler binaries on Windows.
|
|
51
|
+
#
|
|
52
|
+
# Downloads the latest release from GitHub and extracts it to the cache directory.
|
|
53
|
+
# Does nothing if already installed.
|
|
54
|
+
def install
|
|
55
|
+
zip_path = nil
|
|
56
|
+
return if installed?
|
|
57
|
+
|
|
58
|
+
zip_path = download_release_file
|
|
59
|
+
extract_zip_file(zip_path)
|
|
60
|
+
ensure
|
|
61
|
+
FileUtils.rm_f(zip_path) if zip_path
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Checks if Poppler utilities are available.
|
|
65
|
+
#
|
|
66
|
+
# @return [Boolean] true if `pdftoppm` and `pdfinfo` are in the PATH or cached.
|
|
67
|
+
def installed? = (command_exists?("pdftoppm") && command_exists?("pdfinfo")) || cached?
|
|
68
|
+
|
|
69
|
+
# Checks if Poppler binaries are present in the local cache (Windows only).
|
|
70
|
+
#
|
|
71
|
+
# @return [Boolean] true if cached binaries exist.
|
|
72
|
+
def cached?
|
|
73
|
+
return false unless Gem.win_platform?
|
|
74
|
+
|
|
75
|
+
File.exist?(File.join(cached_bin_path, "pdftoppm.exe"))
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Resolves the path to the `pdftoppm` executable.
|
|
79
|
+
#
|
|
80
|
+
# Prioritizes the system PATH, falling back to the cached version on Windows.
|
|
81
|
+
#
|
|
82
|
+
# @return [String] path to `pdftoppm`.
|
|
83
|
+
def pdftoppm_path
|
|
84
|
+
return "pdftoppm" if command_exists?("pdftoppm")
|
|
85
|
+
|
|
86
|
+
Gem.win_platform? ? File.join(cached_bin_path, "pdftoppm.exe") : nil
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Resolves the path to the `pdfinfo` executable.
|
|
90
|
+
#
|
|
91
|
+
# Prioritizes the system PATH, falling back to the cached version on Windows.
|
|
92
|
+
#
|
|
93
|
+
# @return [String] path to `pdfinfo`.
|
|
94
|
+
def pdfinfo_path
|
|
95
|
+
return "pdfinfo" if command_exists?("pdfinfo")
|
|
96
|
+
|
|
97
|
+
Gem.win_platform? ? File.join(cached_bin_path, "pdfinfo.exe") : nil
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
private
|
|
101
|
+
|
|
102
|
+
# Locates the `bin` directory within the cached Poppler installation.
|
|
103
|
+
#
|
|
104
|
+
# Searches for a directory matching "poppler-*" in the cache directory and returns
|
|
105
|
+
# the path to its `Library/bin` subdirectory.
|
|
106
|
+
#
|
|
107
|
+
# @return [String] Path to the `bin` directory, or an empty string if not found.
|
|
108
|
+
def cached_bin_path
|
|
109
|
+
poppler_root = Dir.glob(File.join(cache_dir, "poppler-*")).first
|
|
110
|
+
return "" unless poppler_root
|
|
111
|
+
|
|
112
|
+
File.join(poppler_root, "Library", "bin")
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# Checks if a command is available in the system path.
|
|
116
|
+
#
|
|
117
|
+
# @param cmd [String] The command to check for.
|
|
118
|
+
# @return [Boolean] true if the command exists in the PATH.
|
|
119
|
+
def command_exists?(cmd)
|
|
120
|
+
tool = Gem.win_platform? ? "where" : "which"
|
|
121
|
+
system("#{tool} #{cmd} > #{File::NULL} 2>&1")
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
# Downloads the latest Poppler release zip file.
|
|
125
|
+
#
|
|
126
|
+
# Fetches the download URL from the GitHub API and saves the file to the cache directory.
|
|
127
|
+
#
|
|
128
|
+
# @return [String] The local path to the downloaded zip file.
|
|
129
|
+
def download_release_file
|
|
130
|
+
release_url = latest_release_url
|
|
131
|
+
zip_path = File.join(cache_dir, File.basename(release_url))
|
|
132
|
+
URI.parse(release_url).open { File.binwrite(zip_path, _1.read) }
|
|
133
|
+
zip_path
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
# Retrieves the download URL for the latest Windows release of Poppler.
|
|
137
|
+
#
|
|
138
|
+
# Queries the GitHub API for the latest release and finds the asset matching "Release*.zip".
|
|
139
|
+
#
|
|
140
|
+
# @return [String] The download URL of the asset.
|
|
141
|
+
# @raise [SystemExit] if the API request fails or no valid asset is found.
|
|
142
|
+
def latest_release_url # rubocop:disable Metrics/AbcSize
|
|
143
|
+
uri = URI(POPPLER_REPO_API)
|
|
144
|
+
request = Net::HTTP::Get.new(uri)
|
|
145
|
+
request["User-Agent"] = "Tahweel-Gem"
|
|
146
|
+
|
|
147
|
+
response = Net::HTTP.start(uri.hostname, uri.port, use_ssl: true) { _1.request(request) }
|
|
148
|
+
|
|
149
|
+
unless response.is_a?(Net::HTTPSuccess)
|
|
150
|
+
abort "Failed to fetch Poppler release info: #{response.code} #{response.message}"
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
asset = JSON.parse(response.body)["assets"].find { _1["name"].match?(/^Release.*\.zip$/) }
|
|
154
|
+
|
|
155
|
+
asset ? asset["browser_download_url"] : abort("No valid Windows release found for Poppler.")
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
# Extracts the downloaded zip file to the cache directory.
|
|
159
|
+
#
|
|
160
|
+
# @param zip_path [String] Path to the zip file to extract.
|
|
161
|
+
def extract_zip_file(zip_path)
|
|
162
|
+
Zip::File.open(zip_path) do |zip_file|
|
|
163
|
+
zip_file.each do |entry|
|
|
164
|
+
entry_dest = File.join(cache_dir, entry.name)
|
|
165
|
+
FileUtils.mkdir_p(File.dirname(entry_dest))
|
|
166
|
+
zip_file.extract(entry, entry_dest) { true }
|
|
167
|
+
end
|
|
168
|
+
end
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
# Resolves the directory used for caching downloaded binaries.
|
|
172
|
+
#
|
|
173
|
+
# Uses the XDG cache home directory if available, otherwise defaults to `~/.cache/tahweel/poppler`.
|
|
174
|
+
#
|
|
175
|
+
# @return [String] Path to the cache directory.
|
|
176
|
+
def cache_dir
|
|
177
|
+
base = XDG.new.cache_home.to_s
|
|
178
|
+
base = File.join(Dir.home, ".cache") if base.empty?
|
|
179
|
+
|
|
180
|
+
dir = File.join(base, "tahweel", "poppler")
|
|
181
|
+
FileUtils.mkdir_p(dir)
|
|
182
|
+
dir
|
|
183
|
+
end
|
|
184
|
+
end
|
|
185
|
+
end
|
data/lib/tahweel/version.rb
CHANGED
data/lib/tahweel/writer.rb
CHANGED
data/lib/tahweel/writers/docx.rb
CHANGED
|
@@ -14,10 +14,11 @@ module Tahweel
|
|
|
14
14
|
# Writes the extracted texts to a file.
|
|
15
15
|
#
|
|
16
16
|
# It applies several transformations to the text before writing:
|
|
17
|
-
# 1. Normalizes line endings to `\n`.
|
|
17
|
+
# 1. Normalizes all line endings (`\r\n`, `\r`) to `\n`.
|
|
18
18
|
# 2. Collapses consecutive identical whitespace characters.
|
|
19
19
|
# 3. Compacts the text by merging short lines if the page is too long (> 40 lines).
|
|
20
20
|
# 4. Determines text alignment (RTL/LTR) based on content.
|
|
21
|
+
# 5. Converts `\n` to proper OOXML line breaks for cross-platform compatibility.
|
|
21
22
|
#
|
|
22
23
|
# @param texts [Array<String>] The extracted texts (one per page).
|
|
23
24
|
# @param destination [String] The output file path.
|
|
@@ -26,10 +27,10 @@ module Tahweel
|
|
|
26
27
|
def write(texts, destination, options = {}) # rubocop:disable Lint/UnusedMethodArgument
|
|
27
28
|
Caracal::Document.save(destination) do |docx|
|
|
28
29
|
texts.each_with_index do |text, index|
|
|
29
|
-
text = text.gsub(
|
|
30
|
+
text = text.gsub(/\r\n?/, "\n").gsub(/(\s)\1+/, '\1').strip
|
|
30
31
|
text = compact_shortest_lines(text) while expected_lines_in_page(text) > 40
|
|
31
32
|
|
|
32
|
-
docx
|
|
33
|
+
write_paragraph(docx, text)
|
|
33
34
|
|
|
34
35
|
docx.page if index < texts.size - 1
|
|
35
36
|
end
|
|
@@ -38,6 +39,28 @@ module Tahweel
|
|
|
38
39
|
|
|
39
40
|
private
|
|
40
41
|
|
|
42
|
+
# Writes a paragraph with proper OOXML line breaks.
|
|
43
|
+
#
|
|
44
|
+
# Raw newline characters (\n, \r\n) are not valid line breaks in DOCX format.
|
|
45
|
+
# Microsoft Word on Windows requires proper <w:br/> elements for line breaks,
|
|
46
|
+
# while macOS Pages is more lenient. This method uses Caracal's `br` method
|
|
47
|
+
# to insert cross-platform compatible line breaks.
|
|
48
|
+
#
|
|
49
|
+
# @param docx [Caracal::Document] The document to write to.
|
|
50
|
+
# @param text [String] The text content with newlines.
|
|
51
|
+
# @return [void]
|
|
52
|
+
def write_paragraph(docx, text)
|
|
53
|
+
lines = text.split("\n")
|
|
54
|
+
alignment = alignment_for(text)
|
|
55
|
+
|
|
56
|
+
docx.p align: alignment do
|
|
57
|
+
lines.each_with_index do |line, line_index|
|
|
58
|
+
text line, size: 20
|
|
59
|
+
br if line_index < lines.size - 1
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
41
64
|
# Determines the text alignment based on the ratio of Arabic to non-Arabic characters.
|
|
42
65
|
#
|
|
43
66
|
# @param text [String] The text to analyze.
|
data/lib/tahweel.rb
CHANGED
|
@@ -1,14 +1,19 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require_relative "tahweel/version"
|
|
4
|
+
require_relative "tahweel/cli/options"
|
|
5
|
+
require_relative "tahweel/cli/file_processor"
|
|
6
|
+
require_relative "tahweel/cli/file_collector"
|
|
4
7
|
require_relative "tahweel/authorizer"
|
|
8
|
+
require_relative "tahweel/poppler_installer"
|
|
5
9
|
require_relative "tahweel/pdf_splitter"
|
|
6
10
|
require_relative "tahweel/ocr"
|
|
11
|
+
require_relative "tahweel/processors/google_drive"
|
|
7
12
|
require_relative "tahweel/converter"
|
|
8
13
|
require_relative "tahweel/writer"
|
|
9
|
-
require_relative "tahweel/
|
|
10
|
-
require_relative "tahweel/
|
|
11
|
-
require_relative "tahweel/
|
|
14
|
+
require_relative "tahweel/writers/txt"
|
|
15
|
+
require_relative "tahweel/writers/docx"
|
|
16
|
+
require_relative "tahweel/writers/json"
|
|
12
17
|
|
|
13
18
|
module Tahweel # rubocop:disable Style/Documentation
|
|
14
19
|
class Error < StandardError; end
|
data/website/favicon.ico
ADDED
|
Binary file
|