i18n-context-generator 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,49 @@
1
+ # frozen_string_literal: true
2
+
3
+ module I18nContextGenerator
4
+ # File-based cache for LLM results, keyed by translation key and source context.
5
+ class Cache
6
+ CACHE_DIR = '.i18n-context-generator-cache'
7
+ # Bump this when prompt format, search heuristics, or output schema change
8
+ CACHE_VERSION = 'v2'
9
+
10
+ def initialize(enabled: true)
11
+ @enabled = enabled
12
+ FileUtils.mkdir_p(CACHE_DIR) if @enabled && !File.directory?(CACHE_DIR)
13
+ end
14
+
15
+ def get(key, text, context: nil)
16
+ return nil unless @enabled
17
+
18
+ path = cache_path(key, text, context)
19
+ return nil unless File.exist?(path)
20
+
21
+ Oj.load_file(path, symbol_keys: true)
22
+ rescue StandardError => e
23
+ warn "Cache read error for #{key}: #{e.message}"
24
+ nil
25
+ end
26
+
27
+ def set(key, text, result, context: nil)
28
+ return unless @enabled
29
+
30
+ path = cache_path(key, text, context)
31
+ File.write(path, Oj.dump(result, indent: 2, mode: :compat))
32
+ rescue StandardError => e
33
+ warn "Cache write error for #{key}: #{e.message}"
34
+ end
35
+
36
+ def clear
37
+ FileUtils.rm_rf(CACHE_DIR) if File.directory?(CACHE_DIR)
38
+ end
39
+
40
+ private
41
+
42
+ def cache_path(key, text, context)
43
+ # Include version, key, text, and context (match locations/code) in hash
44
+ # so cache invalidates when source code usage changes
45
+ hash = Digest::MD5.hexdigest("#{CACHE_VERSION}:#{key}:#{text}:#{context}")
46
+ File.join(CACHE_DIR, "#{hash}.json")
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,223 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'thor'
4
+
5
+ module I18nContextGenerator
6
+ # Thor-based CLI entry point for the i18n-context-generator command.
7
+ class CLI < Thor
8
+ def self.exit_on_failure?
9
+ true
10
+ end
11
+
12
+ desc 'extract', 'Extract translation context from source code'
13
+ long_desc <<~DESC
14
+ Analyzes source code to extract contextual information for translation keys.
15
+ Uses AI to understand how strings are used in the UI and generates descriptions
16
+ to help translators produce better translations.
17
+
18
+ Examples:
19
+ # iOS app
20
+ i18n-context-generator extract -t ios/Localizable.strings -s ios/
21
+
22
+ # Android app
23
+ i18n-context-generator extract -t android/res/values/strings.xml -s android/app/
24
+
25
+ # Write context back to source files
26
+ i18n-context-generator extract -t Localizable.strings -s . --write-back
27
+
28
+ # Use config file
29
+ i18n-context-generator extract --config .i18n-context-generator.yml
30
+ DESC
31
+ option :config, aliases: '-c', desc: 'Path to config file (.i18n-context-generator.yml)'
32
+ option :translations, aliases: '-t', desc: 'Translation file(s), comma-separated'
33
+ option :source, aliases: '-s', desc: 'Source directory(ies) to search, comma-separated'
34
+ option :output, aliases: '-o', desc: 'Output file path (CSV written only if specified)'
35
+ option :format, aliases: '-f', enum: %w[csv json], desc: 'Output format (default: csv)'
36
+ option :provider, aliases: '-p', enum: %w[anthropic openai], desc: 'LLM provider (default: anthropic)'
37
+ option :model, aliases: '-m', desc: 'LLM model to use'
38
+ option :keys, aliases: '-k', desc: 'Filter keys (comma-separated patterns, supports * wildcard)'
39
+ option :concurrency, type: :numeric, desc: 'Number of concurrent requests (default: 5)'
40
+ option :dry_run, type: :boolean, desc: 'Show what would be processed without calling LLM'
41
+ option :cache, type: :boolean, desc: 'Enable caching of LLM results'
42
+ option :write_back, type: :boolean,
43
+ desc: 'Write context back to source translation files (.strings, strings.xml)'
44
+ option :write_back_to_code, type: :boolean,
45
+ desc: 'Write context back to Swift source code comment: parameters'
46
+ option :diff_base, type: :string, desc: 'Only process keys changed since this git ref (e.g., main, origin/main)'
47
+ option :context_prefix, type: :string,
48
+ desc: 'Prefix for context comments (default: "Context: ", use empty string for none)'
49
+ option :context_mode, type: :string, enum: %w[replace append],
50
+ desc: 'How to handle existing comments: replace or append (default: replace)'
51
+ option :start_key, type: :string, desc: 'Start processing from this key (inclusive)'
52
+ option :end_key, type: :string, desc: 'Stop processing at this key (inclusive)'
53
+ option :include_file_paths, type: :boolean,
54
+ desc: 'Include full source file paths in LLM prompts (default: false)'
55
+ option :include_translation_comments, type: :boolean,
56
+ desc: 'Include translation file comments in LLM prompts (default: true)'
57
+ option :redact_prompts, type: :boolean,
58
+ desc: 'Redact likely secrets and PII from LLM prompts (default: true)'
59
+
60
+ def extract
61
+ validate_options!
62
+ config = Config.load(options)
63
+ validate_api_key!(provider: config.provider, dry_run: config.dry_run)
64
+ validate_diff_base!(base_ref: config.diff_base) if config.diff_base
65
+ extractor = ContextExtractor.new(config)
66
+ extractor.run
67
+ fail_if_extraction_errors!(extractor)
68
+ rescue I18nContextGenerator::Error => e
69
+ say_error "Error: #{e.message}"
70
+ exit 1
71
+ rescue Interrupt
72
+ say "\nInterrupted"
73
+ exit 130
74
+ end
75
+
76
+ desc 'init', 'Create a sample config file'
77
+ option :force, type: :boolean, default: false, desc: 'Overwrite existing config'
78
+
79
+ def init
80
+ config_path = '.i18n-context-generator.yml'
81
+
82
+ if File.exist?(config_path) && !options[:force]
83
+ say_error 'Config file already exists. Use --force to overwrite.'
84
+ exit 1
85
+ end
86
+
87
+ File.write(config_path, sample_config)
88
+ say "Created #{config_path}"
89
+ end
90
+
91
+ desc 'version', 'Show version'
92
+ def version
93
+ say "i18n-context-generator #{VERSION}"
94
+ end
95
+
96
+ default_task :extract
97
+
98
+ private
99
+
100
+ def validate_options!
101
+ return if options[:config] && File.exist?(options[:config])
102
+
103
+ return if options[:translations]
104
+
105
+ say_error 'Error: --translations (-t) is required unless using a config file'
106
+ exit 1
107
+ end
108
+
109
+ def validate_api_key!(provider: nil, dry_run: nil)
110
+ return if dry_run.nil? ? options[:dry_run] : dry_run
111
+
112
+ provider ||= options[:provider] || 'anthropic'
113
+ env_var = case provider
114
+ when 'anthropic' then 'ANTHROPIC_API_KEY'
115
+ when 'openai' then 'OPENAI_API_KEY'
116
+ else "#{provider.upcase}_API_KEY"
117
+ end
118
+
119
+ return if ENV[env_var]
120
+
121
+ say_error "Error: #{env_var} environment variable is required for provider '#{provider}'"
122
+ say_error "Set it with: export #{env_var}=your-api-key"
123
+ exit 1
124
+ end
125
+
126
+ def validate_diff_base!(base_ref: options[:diff_base])
127
+ unless GitDiff.available?
128
+ say_error 'Error: --diff-base requires a git repository'
129
+ exit 1
130
+ end
131
+
132
+ git_diff = GitDiff.new(base_ref: base_ref)
133
+ return if git_diff.base_ref_exists?
134
+
135
+ say_error "Error: git ref '#{base_ref}' not found"
136
+ say_error 'Try: origin/main, main, or a specific commit SHA'
137
+ exit 1
138
+ end
139
+
140
+ def say_error(message)
141
+ warn message
142
+ end
143
+
144
+ def fail_if_extraction_errors!(extractor)
145
+ return unless extractor.errors.any?
146
+
147
+ say_error "Completed with #{extractor.errors.size} extraction error(s)."
148
+ exit 1
149
+ end
150
+
151
+ def sample_config
152
+ <<~YAML
153
+ # i18n-context-generator configuration
154
+ # Extract translation context from mobile app source code
155
+
156
+ # Translation files to process
157
+ # Supported formats: .strings (iOS), strings.xml (Android), .json, .yml
158
+ translations:
159
+ # iOS example
160
+ - path: ios/MyApp/Resources/Localizable.strings
161
+
162
+ # Android example
163
+ # - path: android/app/src/main/res/values/strings.xml
164
+
165
+ # Source code directories to search
166
+ source:
167
+ paths:
168
+ - ios/MyApp/
169
+ # - android/app/src/main/java/
170
+ ignore:
171
+ - "**/Pods/**"
172
+ - "**/build/**"
173
+ - "**/*.generated.*"
174
+ - "**/*Tests*"
175
+
176
+ # LLM configuration
177
+ llm:
178
+ provider: anthropic
179
+ model: claude-sonnet-4-6
180
+ # API key is read from the matching provider env var
181
+ # (ANTHROPIC_API_KEY or OPENAI_API_KEY)
182
+
183
+ # Processing options
184
+ processing:
185
+ concurrency: 5
186
+ context_lines: 15
187
+ max_matches_per_key: 3
188
+
189
+ # Output configuration
190
+ output:
191
+ format: csv
192
+ path: translation-context.csv
193
+ # Set to true to write context comments back to translation files (.strings, strings.xml)
194
+ write_back: false
195
+ # Set to true to write context back to Swift source code comment: parameters
196
+ write_back_to_code: false
197
+ # Prefix for context comments (use empty string for no prefix)
198
+ # context_prefix: "Context: "
199
+ # How to handle existing comments: "replace" or "append"
200
+ # context_mode: replace
201
+
202
+ # Swift-specific configuration for write_back_to_code
203
+ swift:
204
+ # Localization functions to update (default shown)
205
+ functions:
206
+ - NSLocalizedString
207
+ - "String(localized:"
208
+ - "Text("
209
+ # Add custom functions like:
210
+ # - "MyLocalizedString("
211
+
212
+ # Prompt privacy controls
213
+ privacy:
214
+ # Include full source paths in prompts sent to the LLM (default: false)
215
+ include_file_paths: false
216
+ # Include translation file comments in prompts (default: true)
217
+ include_translation_comments: true
218
+ # Redact likely secrets, URLs, and emails before sending prompts (default: true)
219
+ redact_prompts: true
220
+ YAML
221
+ end
222
+ end
223
+ end
@@ -0,0 +1,211 @@
1
+ # frozen_string_literal: true
2
+
3
+ module I18nContextGenerator
4
+ # Holds all configuration for an extraction run, loaded from YAML config files and/or CLI options.
5
+ class Config
6
+ attr_reader :translations, :source_paths, :ignore_patterns,
7
+ :provider, :model, :concurrency, :context_lines,
8
+ :max_matches_per_key, :output_path, :output_format,
9
+ :no_cache, :dry_run, :key_filter, :write_back,
10
+ :swift_functions, :write_back_to_code, :diff_base, :context_prefix,
11
+ :context_mode, :start_key, :end_key, :include_file_paths,
12
+ :include_translation_comments, :redact_prompts
13
+
14
+ DEFAULT_CONTEXT_PREFIX = 'Context: '
15
+ DEFAULT_CONTEXT_MODE = 'replace' # "replace" or "append"
16
+
17
+ def initialize(**attrs)
18
+ @translations = attrs[:translations] || []
19
+ @source_paths = attrs[:source_paths] || ['.']
20
+ @ignore_patterns = attrs[:ignore_patterns] || []
21
+ @provider = attrs[:provider] || 'anthropic'
22
+ @model = attrs[:model]
23
+ @concurrency = attrs[:concurrency] || 5
24
+ @context_lines = attrs[:context_lines] || 15
25
+ @max_matches_per_key = attrs[:max_matches_per_key] || 3
26
+ @output_path = attrs.key?(:output_path) ? attrs[:output_path] : nil
27
+ @output_format = attrs[:output_format] || 'csv'
28
+ @no_cache = attrs.key?(:no_cache) ? attrs[:no_cache] : true
29
+ @dry_run = attrs[:dry_run] || false
30
+ @key_filter = attrs[:key_filter]
31
+ @write_back = attrs[:write_back] || false
32
+ @write_back_to_code = attrs[:write_back_to_code] || false
33
+ @swift_functions = attrs[:swift_functions] || default_swift_functions
34
+ @diff_base = attrs[:diff_base]
35
+ @context_prefix = attrs.key?(:context_prefix) ? attrs[:context_prefix] : DEFAULT_CONTEXT_PREFIX
36
+ @context_mode = attrs[:context_mode] || DEFAULT_CONTEXT_MODE
37
+ @start_key = attrs[:start_key]
38
+ @end_key = attrs[:end_key]
39
+ @include_file_paths = attrs.key?(:include_file_paths) ? attrs[:include_file_paths] : false
40
+ @include_translation_comments = attrs.key?(:include_translation_comments) ? attrs[:include_translation_comments] : true
41
+ @redact_prompts = attrs.key?(:redact_prompts) ? attrs[:redact_prompts] : true
42
+ end
43
+
44
+ def default_swift_functions
45
+ %w[NSLocalizedString String(localized: Text(]
46
+ end
47
+
48
+ def self.load(options)
49
+ if options[:config] && File.exist?(options[:config])
50
+ from_file(options[:config]).merge_cli(options)
51
+ else
52
+ from_cli(options)
53
+ end
54
+ end
55
+
56
+ def self.from_file(path)
57
+ yaml = YAML.safe_load_file(path, permitted_classes: []) || {}
58
+
59
+ attrs = {
60
+ translations: parse_translations(yaml['translations']),
61
+ source_paths: yaml.dig('source', 'paths') || ['.'],
62
+ ignore_patterns: yaml.dig('source', 'ignore') || default_ignore_patterns,
63
+ provider: yaml.dig('llm', 'provider') || 'anthropic',
64
+ model: yaml.dig('llm', 'model'),
65
+ concurrency: yaml.dig('processing', 'concurrency') || 5,
66
+ context_lines: yaml.dig('processing', 'context_lines') || 15,
67
+ max_matches_per_key: yaml.dig('processing', 'max_matches_per_key') || 3,
68
+ output_path: yaml.dig('output', 'path'),
69
+ output_format: yaml.dig('output', 'format') || 'csv',
70
+ write_back: yaml.dig('output', 'write_back') || false,
71
+ write_back_to_code: yaml.dig('output', 'write_back_to_code') || false,
72
+ context_mode: yaml.dig('output', 'context_mode'),
73
+ swift_functions: yaml.dig('swift', 'functions')
74
+ }
75
+
76
+ # Only pass context_prefix when explicitly set in YAML, so initialize default applies
77
+ prefix = yaml.dig('output', 'context_prefix')
78
+ attrs[:context_prefix] = prefix unless prefix.nil?
79
+ attrs[:include_file_paths] = yaml.dig('privacy', 'include_file_paths') unless yaml.dig('privacy', 'include_file_paths').nil?
80
+ attrs[:include_translation_comments] = yaml.dig('privacy', 'include_translation_comments') unless yaml.dig('privacy', 'include_translation_comments').nil?
81
+ attrs[:redact_prompts] = yaml.dig('privacy', 'redact_prompts') unless yaml.dig('privacy', 'redact_prompts').nil?
82
+
83
+ new(**attrs)
84
+ end
85
+
86
+ def self.from_cli(options)
87
+ translations = if options[:translations]
88
+ options[:translations].split(',').map(&:strip)
89
+ else
90
+ []
91
+ end
92
+
93
+ source_paths = if options[:source]
94
+ options[:source].split(',').map(&:strip)
95
+ else
96
+ ['.']
97
+ end
98
+
99
+ attrs = {
100
+ translations: translations,
101
+ source_paths: source_paths,
102
+ ignore_patterns: default_ignore_patterns,
103
+ provider: options[:provider] || 'anthropic',
104
+ model: options[:model],
105
+ concurrency: options[:concurrency] || 5,
106
+ context_lines: 15,
107
+ max_matches_per_key: 3,
108
+ output_path: options[:output],
109
+ output_format: options[:format] || 'csv',
110
+ no_cache: options[:cache].nil? || !options[:cache],
111
+ dry_run: options[:dry_run] || false,
112
+ key_filter: options[:keys],
113
+ write_back: options[:write_back] || false,
114
+ write_back_to_code: options[:write_back_to_code] || false,
115
+ diff_base: options[:diff_base],
116
+ start_key: options[:start_key],
117
+ end_key: options[:end_key]
118
+ }
119
+
120
+ # Only include if explicitly provided, so Config.new can apply its defaults
121
+ attrs[:context_prefix] = options[:context_prefix] unless options[:context_prefix].nil?
122
+ attrs[:context_mode] = options[:context_mode] if options[:context_mode]
123
+ attrs[:include_file_paths] = options[:include_file_paths] unless options[:include_file_paths].nil?
124
+ attrs[:include_translation_comments] = options[:include_translation_comments] unless options[:include_translation_comments].nil?
125
+ attrs[:redact_prompts] = options[:redact_prompts] unless options[:redact_prompts].nil?
126
+
127
+ new(**attrs)
128
+ end
129
+
130
+ # Merge CLI options over config-file values.
131
+ # Only options explicitly passed by the user (non-nil) are merged.
132
+ # Thor options without defaults are nil when not passed, so this
133
+ # correctly preserves config-file values for unspecified flags.
134
+ def merge_cli(options)
135
+ @translations = options[:translations].split(',').map(&:strip) if options[:translations]
136
+ @source_paths = options[:source].split(',').map(&:strip) if options[:source]
137
+ merge_cli_scalar_options(options)
138
+ merge_cli_boolean_options(options)
139
+ self
140
+ end
141
+
142
+ def self.parse_translations(translations)
143
+ return [] unless translations
144
+
145
+ translations.map do |t|
146
+ t.is_a?(Hash) ? t['path'] : t
147
+ end
148
+ end
149
+
150
+ def self.default_ignore_patterns
151
+ [
152
+ '**/node_modules/**',
153
+ '**/vendor/**',
154
+ '**/.git/**',
155
+ '**/build/**',
156
+ '**/dist/**',
157
+ '**/*.min.js',
158
+ '**/*.test.*',
159
+ '**/*.spec.*',
160
+ '**/Pods/**',
161
+ '**/Carthage/**',
162
+ '**/.build/**',
163
+ '**/DerivedData/**',
164
+ '**/*Tests.swift',
165
+ '**/*Tests.kt',
166
+ '**/*Test.java',
167
+ '**/*Test.kt'
168
+ ]
169
+ end
170
+
171
+ private
172
+
173
+ def merge_cli_scalar_options(options)
174
+ scalar_mappings = {
175
+ key_filter: :keys,
176
+ output_path: :output,
177
+ output_format: :format,
178
+ provider: :provider,
179
+ model: :model,
180
+ concurrency: :concurrency,
181
+ diff_base: :diff_base,
182
+ context_prefix: :context_prefix,
183
+ context_mode: :context_mode,
184
+ start_key: :start_key,
185
+ end_key: :end_key
186
+ }
187
+
188
+ scalar_mappings.each do |attr_name, option_name|
189
+ value = options[option_name]
190
+ instance_variable_set(:"@#{attr_name}", value) unless value.nil?
191
+ end
192
+ end
193
+
194
+ def merge_cli_boolean_options(options)
195
+ boolean_mappings = {
196
+ dry_run: :dry_run,
197
+ write_back: :write_back,
198
+ write_back_to_code: :write_back_to_code,
199
+ include_file_paths: :include_file_paths,
200
+ include_translation_comments: :include_translation_comments,
201
+ redact_prompts: :redact_prompts
202
+ }
203
+
204
+ @no_cache = !options[:cache] unless options[:cache].nil?
205
+ boolean_mappings.each do |attr_name, option_name|
206
+ value = options[option_name]
207
+ instance_variable_set(:"@#{attr_name}", value) unless value.nil?
208
+ end
209
+ end
210
+ end
211
+ end