llm_translate 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,279 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'yaml'
4
+
5
+ module LlmTranslate
6
+ class Config
7
+ attr_reader :data, :cli_options
8
+
9
+ def self.load(config_path, cli_options = {})
10
+ new(config_path, cli_options)
11
+ end
12
+
13
+ def initialize(config_path, cli_options = {})
14
+ @cli_options = cli_options
15
+ @data = load_config_file(config_path)
16
+ apply_cli_overrides
17
+ validate_config
18
+ end
19
+
20
+ # AI Configuration
21
+ def api_key
22
+ resolve_env_var(data.dig('ai', 'api_key'))
23
+ end
24
+
25
+ def ai_host
26
+ resolve_env_var(data.dig('ai', 'host')) || 'https://aihubmix.com'
27
+ end
28
+
29
+ def ai_provider
30
+ data.dig('ai', 'provider') || 'openai'
31
+ end
32
+
33
+ def ai_model
34
+ data.dig('ai', 'model') || 'claude-3-7-sonnet-20250219'
35
+ end
36
+
37
+ def temperature
38
+ data.dig('ai', 'temperature') || 0.3
39
+ end
40
+
41
+ def max_tokens
42
+ data.dig('ai', 'max_tokens') || 4000
43
+ end
44
+
45
+ def retry_attempts
46
+ data.dig('ai', 'retry_attempts') || 3
47
+ end
48
+
49
+ def retry_delay
50
+ data.dig('ai', 'retry_delay') || 2
51
+ end
52
+
53
+ def timeout
54
+ data.dig('ai', 'timeout') || 60
55
+ end
56
+
57
+ # Translation Configuration
58
+ def default_prompt
59
+ cli_options[:prompt] || data.dig('translation', 'default_prompt') || default_translation_prompt
60
+ end
61
+
62
+ def target_language
63
+ data.dig('translation', 'target_language') || 'zh-CN'
64
+ end
65
+
66
+ def source_language
67
+ data.dig('translation', 'source_language') || 'auto'
68
+ end
69
+
70
+ def preserve_formatting?
71
+ data.dig('translation', 'preserve_formatting') != false
72
+ end
73
+
74
+ def translate_code_comments?
75
+ data.dig('translation', 'translate_code_comments') == true
76
+ end
77
+
78
+ def preserve_patterns
79
+ data.dig('translation', 'preserve_patterns') || default_preserve_patterns
80
+ end
81
+
82
+ # File Configuration
83
+ def input_directory
84
+ cli_options[:input] || data.dig('files', 'input_directory') || './docs'
85
+ end
86
+
87
+ def output_directory
88
+ cli_options[:output] || data.dig('files', 'output_directory') || './docs-translated'
89
+ end
90
+
91
+ def input_file
92
+ data.dig('files', 'input_file')
93
+ end
94
+
95
+ def output_file
96
+ data.dig('files', 'output_file')
97
+ end
98
+
99
+ def single_file_mode?
100
+ !input_file.nil? && !output_file.nil?
101
+ end
102
+
103
+ def filename_strategy
104
+ data.dig('files', 'filename_strategy') || 'suffix'
105
+ end
106
+
107
+ def filename_suffix
108
+ data.dig('files', 'filename_suffix') || '.zh'
109
+ end
110
+
111
+ def include_patterns
112
+ data.dig('files', 'include_patterns') || ['**/*.md', '**/*.markdown']
113
+ end
114
+
115
+ def exclude_patterns
116
+ data.dig('files', 'exclude_patterns') || []
117
+ end
118
+
119
+ def preserve_directory_structure?
120
+ data.dig('files', 'preserve_directory_structure') != false
121
+ end
122
+
123
+ def overwrite_policy
124
+ data.dig('files', 'overwrite_policy') || 'ask'
125
+ end
126
+
127
+ def backup_directory
128
+ data.dig('files', 'backup_directory') || './backups'
129
+ end
130
+
131
+ # Logging Configuration
132
+ def log_level
133
+ cli_options[:verbose] ? 'debug' : (data.dig('logging', 'level') || 'info')
134
+ end
135
+
136
+ def log_output
137
+ data.dig('logging', 'output') || 'console'
138
+ end
139
+
140
+ def log_file_path
141
+ data.dig('logging', 'file_path') || './logs/llm_translate.log'
142
+ end
143
+
144
+ def verbose_translation?
145
+ cli_options[:verbose] || data.dig('logging', 'verbose_translation') == true
146
+ end
147
+
148
+ def error_log_path
149
+ data.dig('logging', 'error_log_path') || './logs/errors.log'
150
+ end
151
+
152
+ # Error Handling Configuration
153
+ def on_error
154
+ data.dig('error_handling', 'on_error') || 'log_and_continue'
155
+ end
156
+
157
+ def max_consecutive_errors
158
+ data.dig('error_handling', 'max_consecutive_errors') || 5
159
+ end
160
+
161
+ def retry_on_failure
162
+ data.dig('error_handling', 'retry_on_failure') || 2
163
+ end
164
+
165
+ def generate_error_report?
166
+ data.dig('error_handling', 'generate_error_report') != false
167
+ end
168
+
169
+ def error_report_path
170
+ data.dig('error_handling', 'error_report_path') || './logs/error_report.md'
171
+ end
172
+
173
+ def should_stop_on_error?(error_count)
174
+ on_error == 'stop' || error_count >= max_consecutive_errors
175
+ end
176
+
177
+ # Performance Configuration
178
+ def concurrent_files
179
+ data.dig('performance', 'concurrent_files') || 3
180
+ end
181
+
182
+ def batch_size
183
+ data.dig('performance', 'batch_size') || 5
184
+ end
185
+
186
+ def request_interval
187
+ data.dig('performance', 'request_interval') || 1
188
+ end
189
+
190
+ def max_memory_mb
191
+ data.dig('performance', 'max_memory_mb') || 500
192
+ end
193
+
194
+ # Output Configuration
195
+ def show_progress?
196
+ data.dig('output', 'show_progress') != false
197
+ end
198
+
199
+ def show_statistics?
200
+ data.dig('output', 'show_statistics') != false
201
+ end
202
+
203
+ def generate_report?
204
+ data.dig('output', 'generate_report') != false
205
+ end
206
+
207
+ def report_path
208
+ data.dig('output', 'report_path') || './reports/translation_report.md'
209
+ end
210
+
211
+ def output_format
212
+ data.dig('output', 'format') || 'markdown'
213
+ end
214
+
215
+ def include_metadata?
216
+ data.dig('output', 'include_metadata') != false
217
+ end
218
+
219
+ private
220
+
221
+ def load_config_file(config_path)
222
+ raise ConfigurationError, "Configuration file not found: #{config_path}" unless File.exist?(config_path)
223
+
224
+ begin
225
+ YAML.safe_load(File.read(config_path)) || {}
226
+ rescue Psych::SyntaxError => e
227
+ raise ConfigurationError, "Invalid YAML in configuration file: #{e.message}"
228
+ end
229
+ end
230
+
231
+ def apply_cli_overrides
232
+ # CLI options take precedence over config file
233
+ # This is already handled in the individual getter methods
234
+ end
235
+
236
+ def validate_config
237
+ # Validate required configuration
238
+ if api_key.nil? || api_key.empty?
239
+ raise ConfigurationError,
240
+ 'API key is required. Set LLM_TRANSLATE_API_KEY environment variable or configure in config file.'
241
+ end
242
+
243
+ return if Dir.exist?(File.dirname(input_directory))
244
+
245
+ raise ConfigurationError, "Input directory parent does not exist: #{File.dirname(input_directory)}"
246
+ end
247
+
248
+ def resolve_env_var(value)
249
+ return value unless value.is_a?(String)
250
+
251
+ # Replace ${VAR_NAME} with environment variable value
252
+ value.gsub(/\$\{([^}]+)\}/) do |match|
253
+ var_name = ::Regexp.last_match(1)
254
+ ENV[var_name] || match
255
+ end
256
+ end
257
+
258
+ def default_translation_prompt
259
+ <<~PROMPT
260
+ Please translate the following Markdown content to #{target_language}, keeping all formatting intact:
261
+ - Preserve code blocks, links, images, and other Markdown syntax
262
+ - Keep English technical terms and product names when appropriate
263
+ - Ensure natural and fluent translation
264
+
265
+ Content:
266
+ {content}
267
+ PROMPT
268
+ end
269
+
270
+ def default_preserve_patterns
271
+ [
272
+ '```[\\s\\S]*?```', # Code blocks
273
+ '`[^`]+`', # Inline code
274
+ '\\[.*?\\]\\(.*?\\)', # Links
275
+ '!\\[.*?\\]\\(.*?\\)' # Images
276
+ ]
277
+ end
278
+ end
279
+ end
@@ -0,0 +1,153 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'find'
4
+ require 'fileutils'
5
+ require 'pathname'
6
+
7
+ module LlmTranslate
8
+ class FileFinder
9
+ attr_reader :config, :logger
10
+
11
+ def initialize(config, logger)
12
+ @config = config
13
+ @logger = logger
14
+ end
15
+
16
+ def find_markdown_files
17
+ input_dir = config.input_directory
18
+
19
+ raise FileError, "Input directory does not exist: #{input_dir}" unless Dir.exist?(input_dir)
20
+
21
+ logger.debug "Scanning directory: #{input_dir}"
22
+ logger.debug "Include patterns: #{config.include_patterns}"
23
+ logger.debug "Exclude patterns: #{config.exclude_patterns}"
24
+
25
+ files = []
26
+
27
+ Find.find(input_dir) do |path|
28
+ next if File.directory?(path)
29
+
30
+ relative_path = Pathname.new(path).relative_path_from(Pathname.new(input_dir)).to_s
31
+
32
+ # Skip if file doesn't match include patterns
33
+ next unless matches_include_patterns?(relative_path)
34
+
35
+ # Skip if file matches exclude patterns
36
+ next if matches_exclude_patterns?(relative_path)
37
+
38
+ # Skip if file is not readable
39
+ unless File.readable?(path)
40
+ logger.warn "Skipping unreadable file: #{path}"
41
+ next
42
+ end
43
+
44
+ files << path
45
+ logger.debug "Found markdown file: #{relative_path}"
46
+ end
47
+
48
+ logger.info "Found #{files.length} markdown files"
49
+ files.sort
50
+ end
51
+
52
+ def output_path_for(input_path)
53
+ input_dir = Pathname.new(config.input_directory)
54
+ output_dir = Pathname.new(config.output_directory)
55
+ file_path = Pathname.new(input_path)
56
+
57
+ # Get relative path from input directory
58
+ relative_path = file_path.relative_path_from(input_dir)
59
+
60
+ # Apply filename strategy
61
+ output_filename = apply_filename_strategy(relative_path.basename.to_s)
62
+ output_relative_path = relative_path.dirname + output_filename
63
+
64
+ # Combine with output directory
65
+ output_dir + output_relative_path
66
+ end
67
+
68
+ def ensure_output_directory(output_path)
69
+ output_dir = File.dirname(output_path)
70
+ return if Dir.exist?(output_dir)
71
+
72
+ logger.debug "Creating output directory: #{output_dir}"
73
+ FileUtils.mkdir_p(output_dir)
74
+ end
75
+
76
+ def should_skip_file?(_input_path, output_path)
77
+ return false unless File.exist?(output_path)
78
+
79
+ case config.overwrite_policy
80
+ when 'skip'
81
+ logger.info "Skipping existing file: #{output_path}"
82
+ true
83
+ when 'overwrite'
84
+ false
85
+ when 'backup'
86
+ create_backup(output_path)
87
+ false
88
+ when 'ask'
89
+ ask_user_permission(output_path)
90
+ else
91
+ false
92
+ end
93
+ end
94
+
95
+ private
96
+
97
+ def matches_include_patterns?(file_path)
98
+ return true if config.include_patterns.empty?
99
+
100
+ config.include_patterns.any? do |pattern|
101
+ File.fnmatch?(pattern, file_path, File::FNM_PATHNAME | File::FNM_DOTMATCH)
102
+ end
103
+ end
104
+
105
+ def matches_exclude_patterns?(file_path)
106
+ return false if config.exclude_patterns.empty?
107
+
108
+ config.exclude_patterns.any? do |pattern|
109
+ File.fnmatch?(pattern, file_path, File::FNM_PATHNAME | File::FNM_DOTMATCH)
110
+ end
111
+ end
112
+
113
+ def apply_filename_strategy(filename)
114
+ case config.filename_strategy
115
+ when 'suffix'
116
+ # Add suffix before file extension
117
+ name = File.basename(filename, '.*')
118
+ ext = File.extname(filename)
119
+ "#{name}#{config.filename_suffix}#{ext}"
120
+ when 'replace'
121
+ # Replace file extension entirely
122
+ name = File.basename(filename, '.*')
123
+ "#{name}#{config.filename_suffix}"
124
+ when 'directory'
125
+ # Keep original filename, rely on directory structure
126
+ filename
127
+ else
128
+ # Default to suffix strategy
129
+ name = File.basename(filename, '.*')
130
+ ext = File.extname(filename)
131
+ "#{name}#{config.filename_suffix}#{ext}"
132
+ end
133
+ end
134
+
135
+ def create_backup(file_path)
136
+ backup_dir = config.backup_directory
137
+ FileUtils.mkdir_p(backup_dir)
138
+
139
+ timestamp = Time.now.strftime('%Y%m%d_%H%M%S')
140
+ backup_filename = "#{File.basename(file_path)}.#{timestamp}.bak"
141
+ backup_path = File.join(backup_dir, backup_filename)
142
+
143
+ FileUtils.cp(file_path, backup_path)
144
+ logger.info "Created backup: #{backup_path}"
145
+ end
146
+
147
+ def ask_user_permission(file_path)
148
+ print "File exists: #{file_path}. Overwrite? (y/N): "
149
+ response = $stdin.gets.chomp.downcase
150
+ !%w[y yes].include?(response)
151
+ end
152
+ end
153
+ end
@@ -0,0 +1,170 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'logger'
4
+ require 'fileutils'
5
+
6
+ module LlmTranslate
7
+ class Logger
8
+ attr_reader :config, :logger, :error_logger
9
+
10
+ def initialize(config)
11
+ @config = config
12
+ @logger = create_main_logger
13
+ @error_logger = create_error_logger
14
+ end
15
+
16
+ def debug(message)
17
+ logger.debug(format_message(message))
18
+ end
19
+
20
+ def info(message)
21
+ logger.info(format_message(message))
22
+ end
23
+
24
+ def warn(message)
25
+ logger.warn(format_message(message))
26
+ end
27
+
28
+ def error(message)
29
+ logger.error(format_message(message))
30
+ error_logger&.error(format_message(message))
31
+ end
32
+
33
+ def fatal(message)
34
+ logger.fatal(format_message(message))
35
+ error_logger&.fatal(format_message(message))
36
+ end
37
+
38
+ def log_translation_start(file_path)
39
+ return unless config.verbose_translation?
40
+
41
+ info("Starting translation: #{file_path}")
42
+ end
43
+
44
+ def log_translation_complete(file_path, output_path)
45
+ return unless config.verbose_translation?
46
+
47
+ info("Translation complete: #{file_path} -> #{output_path}")
48
+ end
49
+
50
+ def log_ai_request(prompt_length, model)
51
+ return unless config.verbose_translation?
52
+
53
+ debug("AI Request - Model: #{model}, Prompt length: #{prompt_length} chars")
54
+ end
55
+
56
+ def log_ai_response(response_length, tokens_used = nil)
57
+ return unless config.verbose_translation?
58
+
59
+ token_info = tokens_used ? ", Tokens: #{tokens_used}" : ''
60
+ debug("AI Response - Length: #{response_length} chars#{token_info}")
61
+ end
62
+
63
+ private
64
+
65
+ def create_main_logger
66
+ case config.log_output
67
+ when 'console'
68
+ create_console_logger
69
+ when 'file'
70
+ create_file_logger(config.log_file_path)
71
+ when 'both'
72
+ create_multi_logger
73
+ else
74
+ create_console_logger
75
+ end
76
+ end
77
+
78
+ def create_error_logger
79
+ return nil unless config.error_log_path
80
+
81
+ FileUtils.mkdir_p(File.dirname(config.error_log_path))
82
+ error_logger = ::Logger.new(config.error_log_path)
83
+ error_logger.level = log_level_constant
84
+ error_logger.formatter = proc do |severity, datetime, _progname, msg|
85
+ "[#{datetime.strftime('%Y-%m-%d %H:%M:%S')}] #{severity}: #{msg}\n"
86
+ end
87
+ error_logger
88
+ end
89
+
90
+ def create_console_logger
91
+ console_logger = ::Logger.new($stdout)
92
+ console_logger.level = log_level_constant
93
+ console_logger.formatter = proc do |severity, datetime, _progname, msg|
94
+ color = case severity
95
+ when 'DEBUG' then "\e[36m" # Cyan
96
+ when 'INFO' then "\e[32m" # Green
97
+ when 'WARN' then "\e[33m" # Yellow
98
+ when 'ERROR' then "\e[31m" # Red
99
+ when 'FATAL' then "\e[35m" # Magenta
100
+ else "\e[0m"
101
+ end
102
+
103
+ reset = "\e[0m"
104
+ timestamp = datetime.strftime('%H:%M:%S')
105
+ "#{color}[#{timestamp}] #{severity}:#{reset} #{msg}\n"
106
+ end
107
+ console_logger
108
+ end
109
+
110
+ def create_file_logger(file_path)
111
+ FileUtils.mkdir_p(File.dirname(file_path))
112
+ file_logger = ::Logger.new(file_path)
113
+ file_logger.level = log_level_constant
114
+ file_logger.formatter = proc do |severity, datetime, _progname, msg|
115
+ "[#{datetime.strftime('%Y-%m-%d %H:%M:%S')}] #{severity}: #{msg}\n"
116
+ end
117
+ file_logger
118
+ end
119
+
120
+ def create_multi_logger
121
+ console_logger = create_console_logger
122
+ file_logger = create_file_logger(config.log_file_path)
123
+
124
+ MultiLogger.new([console_logger, file_logger])
125
+ end
126
+
127
+ def log_level_constant
128
+ case config.log_level.downcase
129
+ when 'debug'
130
+ ::Logger::DEBUG
131
+ when 'info'
132
+ ::Logger::INFO
133
+ when 'warn'
134
+ ::Logger::WARN
135
+ when 'error'
136
+ ::Logger::ERROR
137
+ when 'fatal'
138
+ ::Logger::FATAL
139
+ else
140
+ ::Logger::INFO
141
+ end
142
+ end
143
+
144
+ def format_message(message)
145
+ case message
146
+ when Hash, Array
147
+ message.inspect
148
+ else
149
+ message.to_s
150
+ end
151
+ end
152
+ end
153
+
154
+ # Helper class for logging to multiple destinations
155
+ class MultiLogger
156
+ def initialize(loggers)
157
+ @loggers = loggers
158
+ end
159
+
160
+ %w[debug info warn error fatal].each do |level|
161
+ define_method(level) do |message|
162
+ @loggers.each { |logger| logger.send(level, message) }
163
+ end
164
+ end
165
+
166
+ def level=(level)
167
+ @loggers.each { |logger| logger.level = level }
168
+ end
169
+ end
170
+ end