i18n-context-generator 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,260 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+ require 'net/http'
5
+
6
+ module I18nContextGenerator
7
+ module LLM
8
+ # Result from LLM context generation
9
+ ContextResult = Data.define(:description, :ui_element, :tone, :max_length, :error) do
10
+ def initialize(description:, ui_element: nil, tone: nil, max_length: nil, error: nil)
11
+ super
12
+ end
13
+ end
14
+
15
+ # Base class for LLM clients
16
+ class Client
17
+ SYSTEM_PROMPT = 'You are a mobile app localization expert. Analyze only the provided evidence and provide concise, specific context for translators. Respond with only valid JSON.'
18
+
19
+ def self.for(provider)
20
+ case provider.to_s.downcase
21
+ when 'anthropic'
22
+ Anthropic.new
23
+ when 'openai'
24
+ OpenAI.new
25
+ when 'ollama'
26
+ raise Error, 'Ollama provider not yet implemented'
27
+ else
28
+ raise Error, "Unknown LLM provider: #{provider}"
29
+ end
30
+ end
31
+
32
+ def generate_context(key:, text:, matches:, model: nil, comment: nil,
33
+ include_file_paths: false, redact_prompts: true)
34
+ raise NotImplementedError, 'Subclasses must implement #generate_context'
35
+ end
36
+
37
+ protected
38
+
39
+ def build_prompt(key:, text:, matches:, comment: nil,
40
+ include_file_paths: false, redact_prompts: true)
41
+ platform = detect_platform(matches)
42
+ safe_text = sanitize_prompt_text(text, redact: redact_prompts)
43
+ safe_comment = sanitize_prompt_text(comment, redact: redact_prompts)
44
+ placeholder_info = detect_placeholders(text)
45
+
46
+ <<~PROMPT
47
+ You are analyzing a localized string from a #{platform} mobile app to help translators understand its context.
48
+
49
+ ## Translation Key
50
+ `#{key}`
51
+
52
+ ## Original Text
53
+ "#{safe_text}"
54
+ #{"\n## Developer Comment\n\"#{safe_comment}\"\n" if safe_comment && !safe_comment.strip.empty?}#{"\n## Format Placeholders\n#{placeholder_info}\n" if placeholder_info}
55
+ ## Code Usage
56
+ #{format_matches(matches, include_file_paths: include_file_paths, redact_prompts: redact_prompts)}
57
+
58
+ ## Task
59
+ Analyze how this string is used in the mobile app code and provide context for translators.
60
+
61
+ **IMPORTANT - Avoid False Positives:**
62
+ - Look for ACTUAL UI USAGE, not coincidental code patterns
63
+ - Ignore method calls that happen to match the key (e.g., `.apply()`, `.close()`, `.clear()` are methods, not UI strings)
64
+ - Ignore boolean/string comparisons (e.g., `if value == "yes"` is not UI usage)
65
+ - Ignore analytics event names or tracking parameters
66
+ - Focus on localization patterns: getString(), NSLocalizedString(), Text(), @string/, R.string., etc.
67
+ - If no clear UI usage is found in the code, base your description only on the provided text, developer comment, and key name
68
+ - If evidence is limited, keep the description generic rather than inventing a specific screen, flow, or user action
69
+
70
+ Focus on:
71
+ 1. **Where it appears**: What screen or view displays this text?
72
+ 2. **UI element type**: Is it a button label, navigation title, alert message, placeholder, etc.?
73
+ 3. **User action**: What action triggers this text or what happens when the user interacts with it?
74
+ 4. **Constraints**: Are there any length constraints (e.g., button width, navigation bar)?
75
+
76
+ Write a concise context description (1-2 sentences) that helps a translator understand:
77
+ - The purpose of this text in the app
78
+ - The UI context where it appears
79
+ - Any important considerations for translation
80
+
81
+ **Quality Guidelines:**
82
+ - Be SPECIFIC about WHERE and HOW the text is used, not just what it means
83
+ - Avoid vague descriptions like "used throughout the app" - identify specific screens/features
84
+ - If the text is a common UI term (Save, Cancel, OK), describe its specific usage context in THIS app
85
+ - Do not speculate or hedge. Never use words like "likely", "probably", "appears", "seems", "may", or "might"
86
+ - Only mention screens, features, or actions when they are supported by the provided code, comment, text, or key name
87
+ - Only set `max_length` when there is explicit evidence for a concrete numeric limit; otherwise return null
88
+ - Do not infer `max_length` from general UI conventions like buttons, badges, placeholders, or navigation bars
89
+ - Don't mention code implementation details - focus on the user-facing experience
90
+
91
+ Respond with ONLY a JSON object (no markdown, no explanation):
92
+ {
93
+ "description": "Concise context for translators (1-2 sentences)",
94
+ "ui_element": "button|label|title|alert|toast|placeholder|navigation|menu|tab|error|confirmation|other",
95
+ "tone": "formal|casual|urgent|friendly|technical|neutral",
96
+ "max_length": null or a number only when explicit evidence gives a concrete numeric limit
97
+ }
98
+ PROMPT
99
+ end
100
+
101
+ def detect_platform(matches)
102
+ return 'mobile' if matches.empty?
103
+
104
+ extensions = matches.map { |m| File.extname(m.file).downcase }
105
+
106
+ if extensions.any? { |e| ['.swift', '.m', '.mm'].include?(e) }
107
+ 'iOS'
108
+ elsif extensions.any? { |e| ['.kt', '.java'].include?(e) }
109
+ 'Android'
110
+ else
111
+ 'mobile'
112
+ end
113
+ end
114
+
115
+ def format_matches(matches, include_file_paths:, redact_prompts:)
116
+ matches.map.with_index do |match, i|
117
+ scope_info = match.enclosing_scope ? " (in #{match.enclosing_scope})" : ''
118
+ location = include_file_paths ? match.file : File.basename(match.file)
119
+ context = sanitize_prompt_text(match.context, redact: redact_prompts)
120
+
121
+ <<~MATCH
122
+ ### Match #{i + 1}: #{location}:#{match.line}#{scope_info}
123
+ ```
124
+ #{context}
125
+ ```
126
+ MATCH
127
+ end.join("\n")
128
+ end
129
+
130
+ def sanitize_prompt_text(text, redact:)
131
+ return text if text.nil? || !redact
132
+
133
+ text
134
+ .gsub(%r{https?://\S+}i, '[REDACTED_URL]')
135
+ .gsub(/\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}\b/i, '[REDACTED_EMAIL]')
136
+ .gsub(%r{Bearer\s+[A-Za-z0-9\-._~+/]+=*}i, 'Bearer [REDACTED_TOKEN]')
137
+ .gsub(/((?:api[_-]?key|access[_-]?token|refresh[_-]?token|secret|password)\s*[:=]\s*)"[^"]*"/i,
138
+ '\1"[REDACTED_SECRET]"')
139
+ .gsub(/((?:api[_-]?key|access[_-]?token|refresh[_-]?token|secret|password)\s*[:=]\s*)'[^']*'/i,
140
+ "\\1'[REDACTED_SECRET]'")
141
+ .gsub(/\beyJ[A-Za-z0-9\-_]+(?:\.[A-Za-z0-9\-_]+){2}\b/, '[REDACTED_TOKEN]')
142
+ .gsub(/\b(?!\h{8}-\h{4}-\h{4}-\h{4}-\h{12}\b)[A-Fa-f0-9]{32,}\b/, '[REDACTED_TOKEN]')
143
+ end
144
+
145
+ def detect_placeholders(text)
146
+ # iOS: %@, %d, %f, %ld, %lld, %1$@, %2$d, etc.
147
+ # Android: %s, %d, %f, %1$s, %2$d, etc.
148
+ placeholders = text.scan(/%(?:(\d+)\$)?([#0 +'.-]*\d*(?:\.\d+)?(?:l{0,2}|h{0,2})?[diouxXeEfFgGaAcsSpn@])/)
149
+ return nil if placeholders.empty?
150
+
151
+ descriptions = []
152
+ # Also gather the raw matches for display
153
+ raw = text.scan(/%(?:\d+\$)?[#0 +'.-]*\d*(?:\.\d+)?(?:l{0,2}|h{0,2})?[diouxXeEfFgGaAcsSpn@]/)
154
+ raw.each_with_index do |placeholder, _i|
155
+ type_hint = case placeholder
156
+ when /%.*[di]/ then 'a number'
157
+ when /%.*[fFeEgGaA]/ then 'a decimal number'
158
+ when /%.*[@sS]/ then 'a string value'
159
+ else 'a value'
160
+ end
161
+ descriptions << "#{placeholder} — #{type_hint}"
162
+ end
163
+
164
+ "This string contains #{raw.size} placeholder(s) that must be preserved in translation:\n" +
165
+ descriptions.map { |d| "- #{d}" }.join("\n")
166
+ end
167
+
168
+ def parse_response(text)
169
+ if text.nil? || text.empty?
170
+ return ContextResult.new(description: 'Failed to parse response',
171
+ error: 'Empty response')
172
+ end
173
+
174
+ # Try to extract JSON from the response
175
+ json_text = extract_json(text)
176
+ return ContextResult.new(description: text.strip, error: nil) unless json_text
177
+
178
+ data = JSON.parse(json_text, symbolize_names: true)
179
+
180
+ ContextResult.new(
181
+ description: data[:description] || 'No description provided',
182
+ ui_element: data[:ui_element],
183
+ tone: data[:tone],
184
+ max_length: data[:max_length]
185
+ )
186
+ rescue JSON::ParserError => e
187
+ ContextResult.new(description: text.strip, error: "JSON parse error: #{e.message}")
188
+ end
189
+
190
+ def extract_json(text)
191
+ # Try to find JSON object in the response
192
+ # Handle both raw JSON and markdown-wrapped JSON
193
+ if text.include?('```')
194
+ match = text.match(/```(?:json)?\s*(\{[^`]+\})\s*```/m)
195
+ return match[1] if match
196
+ end
197
+
198
+ # Find first { and try to parse valid JSON from it
199
+ start = text.index('{')
200
+ return nil unless start
201
+
202
+ # Walk backwards from end looking for matching }
203
+ text.length.downto(start + 1) do |i|
204
+ next unless text[i - 1] == '}'
205
+
206
+ candidate = text[start...i]
207
+ begin
208
+ JSON.parse(candidate) # validate it parses
209
+ return candidate
210
+ rescue JSON::ParserError
211
+ next
212
+ end
213
+ end
214
+ nil
215
+ end
216
+
217
+ def post_json(uri:, headers:, body:, open_timeout: 10, read_timeout: 60)
218
+ http = http_for(uri, open_timeout: open_timeout, read_timeout: read_timeout)
219
+
220
+ request = Net::HTTP::Post.new(
221
+ uri.request_uri,
222
+ { 'Content-Type' => 'application/json' }.merge(headers)
223
+ )
224
+ request.body = JSON.generate(body)
225
+
226
+ http.request(request)
227
+ end
228
+
229
+ # Returns a persistent Net::HTTP session scoped to the current thread.
230
+ # This preserves connection reuse without sharing a mutable Net::HTTP
231
+ # instance across the worker pool.
232
+ def http_for(uri, open_timeout:, read_timeout:)
233
+ key = [uri.scheme, uri.host, uri.port]
234
+ sessions = Thread.current.thread_variable_get(http_sessions_key) || {}
235
+ http = sessions[key]
236
+
237
+ if http&.started?
238
+ http.open_timeout = open_timeout
239
+ http.read_timeout = read_timeout
240
+ return http
241
+ end
242
+
243
+ http = Net::HTTP.new(uri.host, uri.port)
244
+ http.use_ssl = uri.scheme == 'https'
245
+ http.open_timeout = open_timeout
246
+ http.read_timeout = read_timeout
247
+ http.keep_alive_timeout = 30
248
+ http.start
249
+
250
+ sessions[key] = http
251
+ Thread.current.thread_variable_set(http_sessions_key, sessions)
252
+ http
253
+ end
254
+
255
+ def http_sessions_key
256
+ @http_sessions_key ||= :"i18n_context_generator_http_sessions_#{object_id}"
257
+ end
258
+ end
259
+ end
260
+ end
@@ -0,0 +1,112 @@
1
+ # frozen_string_literal: true
2
+
3
+ module I18nContextGenerator
4
+ module LLM
5
+ # OpenAI Responses API implementation of the LLM client.
6
+ class OpenAI < Client
7
+ API_URL = 'https://api.openai.com/v1/responses'
8
+ DEFAULT_MODEL = 'gpt-5-mini'
9
+ MAX_RETRIES = 2
10
+ RESPONSE_SCHEMA = {
11
+ type: 'object',
12
+ additionalProperties: false,
13
+ required: %w[description ui_element tone max_length],
14
+ properties: {
15
+ description: { type: 'string' },
16
+ ui_element: { type: %w[string null], enum: %w[button label title alert toast placeholder navigation menu tab error confirmation other] + [nil] },
17
+ tone: { type: %w[string null], enum: %w[formal casual urgent friendly technical neutral] + [nil] },
18
+ max_length: { type: %w[integer null] }
19
+ }
20
+ }.freeze
21
+
22
+ def initialize
23
+ super
24
+ @api_key = ENV.fetch('OPENAI_API_KEY', nil)
25
+ raise Error, 'OPENAI_API_KEY environment variable is required' unless @api_key
26
+
27
+ @uri = URI(API_URL)
28
+ end
29
+
30
+ def generate_context(key:, text:, matches:, model: nil, comment: nil,
31
+ include_file_paths: false, redact_prompts: true)
32
+ model ||= DEFAULT_MODEL
33
+ prompt = build_prompt(
34
+ key: key,
35
+ text: text,
36
+ matches: matches,
37
+ comment: comment,
38
+ include_file_paths: include_file_paths,
39
+ redact_prompts: redact_prompts
40
+ )
41
+ retries = 0
42
+
43
+ loop do
44
+ response = post_request(model: model, prompt: prompt)
45
+
46
+ if response.code.to_i == 429 && retries < MAX_RETRIES
47
+ retries += 1
48
+ delay = (response['retry-after']&.to_i || 2) * retries
49
+ sleep(delay)
50
+ next
51
+ end
52
+
53
+ return handle_response(response)
54
+ end
55
+ rescue StandardError => e
56
+ ContextResult.new(description: 'API request failed', error: e.message)
57
+ end
58
+
59
+ private
60
+
61
+ def post_request(model:, prompt:)
62
+ post_json(
63
+ uri: @uri,
64
+ headers: {
65
+ 'Authorization' => "Bearer #{@api_key}"
66
+ },
67
+ body: {
68
+ model: model,
69
+ store: false,
70
+ instructions: SYSTEM_PROMPT,
71
+ input: prompt,
72
+ max_output_tokens: 500,
73
+ text: {
74
+ format: {
75
+ type: 'json_schema',
76
+ name: 'translation_context',
77
+ strict: true,
78
+ schema: RESPONSE_SCHEMA
79
+ }
80
+ }
81
+ }
82
+ )
83
+ end
84
+
85
+ def handle_response(response)
86
+ case response.code.to_i
87
+ when 200
88
+ body = JSON.parse(response.body)
89
+ parse_response(extract_output_text(body))
90
+ when 429
91
+ ContextResult.new(description: 'Rate limited', error: 'Rate limit exceeded - try reducing concurrency')
92
+ when 401
93
+ ContextResult.new(description: 'Authentication failed', error: 'Invalid API key')
94
+ else
95
+ error_body = begin
96
+ JSON.parse(response.body)
97
+ rescue StandardError
98
+ {}
99
+ end
100
+ error_msg = error_body.dig('error', 'message') || error_body['message'] || "HTTP #{response.code}"
101
+ ContextResult.new(description: 'API error', error: error_msg)
102
+ end
103
+ end
104
+
105
+ def extract_output_text(body)
106
+ output_item = Array(body['output']).find { |item| item['type'] == 'message' }
107
+ content_item = Array(output_item&.[]('content')).find { |item| item['type'] == 'output_text' }
108
+ content_item&.dig('text')
109
+ end
110
+ end
111
+ end
112
+ end
@@ -0,0 +1,110 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rexml/document'
4
+
5
+ module I18nContextGenerator
6
+ module Parsers
7
+ # Parser for Android strings.xml files
8
+ # Format: <string name="key">value</string>
9
+ # With optional comment: <!-- comment --> <string name="key">value</string>
10
+ class AndroidXmlParser < Base
11
+ def parse(path)
12
+ content = File.read(path, encoding: 'UTF-8')
13
+ doc = REXML::Document.new(content)
14
+ entries = []
15
+
16
+ doc.elements.each('resources/string') do |element|
17
+ next unless translatable?(element)
18
+
19
+ key = element.attributes['name']
20
+ text = inner_text(element)
21
+
22
+ # Look for preceding comment
23
+ comment = find_preceding_comment(element)
24
+
25
+ entries << TranslationEntry.new(
26
+ key: key,
27
+ text: unescape_android_string(text),
28
+ source_file: path,
29
+ metadata: { comment: comment }
30
+ )
31
+ end
32
+
33
+ # Also parse string arrays
34
+ doc.elements.each('resources/string-array') do |array_element|
35
+ next unless translatable?(array_element)
36
+
37
+ array_name = array_element.attributes['name']
38
+ array_element.elements.to_a('item').each_with_index do |item, index|
39
+ entries << TranslationEntry.new(
40
+ key: "#{array_name}[#{index}]",
41
+ text: unescape_android_string(inner_text(item)),
42
+ source_file: path,
43
+ metadata: { array: array_name, index: index }
44
+ )
45
+ end
46
+ end
47
+
48
+ # Also parse plurals
49
+ doc.elements.each('resources/plurals') do |plural_element|
50
+ next unless translatable?(plural_element)
51
+
52
+ plural_name = plural_element.attributes['name']
53
+ plural_element.elements.each('item') do |item|
54
+ quantity = item.attributes['quantity']
55
+ entries << TranslationEntry.new(
56
+ key: "#{plural_name}:#{quantity}",
57
+ text: unescape_android_string(inner_text(item)),
58
+ source_file: path,
59
+ metadata: { plural: plural_name, quantity: quantity }
60
+ )
61
+ end
62
+ end
63
+
64
+ entries
65
+ end
66
+
67
+ private
68
+
69
+ # Get the full inner content of an element, including inline markup like
70
+ # <b>, <i>, <u>, <xliff:g>. REXML::Element#text only returns the first
71
+ # text node, losing everything after a nested element.
72
+ def inner_text(element)
73
+ element.children.map do |child|
74
+ child.is_a?(REXML::Text) ? child.value : child.to_s
75
+ end.join
76
+ end
77
+
78
+ def find_preceding_comment(element)
79
+ # Look at the previous sibling
80
+ prev = element.previous_sibling
81
+ while prev
82
+ if prev.is_a?(REXML::Comment)
83
+ return prev.to_s.strip
84
+ elsif prev.is_a?(REXML::Element)
85
+ # Hit another element, stop looking
86
+ return nil
87
+ end
88
+
89
+ prev = prev.previous_sibling
90
+ end
91
+ nil
92
+ end
93
+
94
+ def translatable?(element)
95
+ element.attributes['translatable']&.downcase != 'false'
96
+ end
97
+
98
+ # Unescape Android string escapes
99
+ def unescape_android_string(str)
100
+ str
101
+ .gsub("\\'", "'")
102
+ .gsub('\\"', '"')
103
+ .gsub('\\n', "\n")
104
+ .gsub('\\t', "\t")
105
+ .gsub('\\@', '@')
106
+ .gsub('\\?', '?')
107
+ end
108
+ end
109
+ end
110
+ end
@@ -0,0 +1,60 @@
1
+ # frozen_string_literal: true
2
+
3
+ module I18nContextGenerator
4
+ module Parsers
5
+ # Represents a single translation entry
6
+ TranslationEntry = Data.define(:key, :text, :source_file, :metadata) do
7
+ def initialize(key:, text:, source_file:, metadata: {})
8
+ super
9
+ end
10
+ end
11
+
12
+ # Base class for translation file parsers
13
+ class Base
14
+ def self.for(path)
15
+ basename = File.basename(path).downcase
16
+ ext = File.extname(path).downcase
17
+
18
+ case ext
19
+ when '.json'
20
+ JsonParser.new
21
+ when '.yml', '.yaml'
22
+ YamlParser.new
23
+ when '.strings'
24
+ StringsParser.new
25
+ when '.xml'
26
+ # Check if it's an Android strings.xml
27
+ raise Error, "Unsupported XML format: #{path} (only Android strings.xml is supported)" unless basename == 'strings.xml' || path.include?('/res/values')
28
+
29
+ AndroidXmlParser.new
30
+
31
+ else
32
+ raise Error, "Unsupported translation file format: #{path}"
33
+ end
34
+ end
35
+
36
+ def parse(path)
37
+ raise NotImplementedError, 'Subclasses must implement #parse'
38
+ end
39
+
40
+ protected
41
+
42
+ # Flatten nested hashes: {"a" => {"b" => "c"}} -> {"a.b" => "c"}
43
+ def flatten_keys(hash, prefix = nil)
44
+ hash.each_with_object({}) do |(key, value), result|
45
+ full_key = [prefix, key].compact.join('.')
46
+
47
+ case value
48
+ when Hash
49
+ result.merge!(flatten_keys(value, full_key))
50
+ when Array
51
+ # Handle arrays (e.g., pluralization)
52
+ result[full_key] = value.join(' | ')
53
+ else
54
+ result[full_key] = value
55
+ end
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module I18nContextGenerator
4
+ module Parsers
5
+ # Parses flat or nested JSON translation files into TranslationEntry objects.
6
+ class JsonParser < Base
7
+ def parse(path)
8
+ data = Oj.load_file(path)
9
+ flatten_keys(data).filter_map do |key, text|
10
+ next if text.nil? || text.to_s.strip.empty?
11
+
12
+ TranslationEntry.new(
13
+ key: key,
14
+ text: text.to_s,
15
+ source_file: path
16
+ )
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ module I18nContextGenerator
4
+ module Parsers
5
+ # Parser for Apple .strings files (iOS/macOS)
6
+ # Uses the dotstrings gem for proper parsing with support for:
7
+ # - Multi-line comments
8
+ # - Unicode and escaped characters
9
+ # - Proper error handling
10
+ class StringsParser < Base
11
+ def parse(path)
12
+ # Use non-strict mode to be lenient with edge cases
13
+ strings_file = DotStrings.parse_file(path, strict: false)
14
+
15
+ strings_file.items.map do |item|
16
+ TranslationEntry.new(
17
+ key: item.key,
18
+ text: item.value,
19
+ source_file: path,
20
+ metadata: { comment: item.comment }
21
+ )
22
+ end
23
+ rescue DotStrings::ParsingError => e
24
+ raise Error, "Failed to parse .strings file #{path}: #{e.message}"
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ module I18nContextGenerator
4
+ module Parsers
5
+ # Parses YAML translation files (including Rails i18n style) into TranslationEntry objects.
6
+ class YamlParser < Base
7
+ def parse(path)
8
+ data = YAML.safe_load_file(path, permitted_classes: [])
9
+
10
+ # Skip top-level locale key if present (Rails i18n style)
11
+ # e.g., { "en" => { "hello" => "Hello" } } -> { "hello" => "Hello" }
12
+ if data.is_a?(Hash) && data.keys.size == 1 && data.values.first.is_a?(Hash)
13
+ locale_key = data.keys.first
14
+ # Only skip if it looks like a locale code (2-5 chars)
15
+ data = data.values.first if locale_key.match?(/\A[a-z]{2}(-[A-Z]{2})?\z/i)
16
+ end
17
+
18
+ flatten_keys(data).filter_map do |key, text|
19
+ next if text.nil? || text.to_s.strip.empty?
20
+
21
+ TranslationEntry.new(
22
+ key: key,
23
+ text: text.to_s,
24
+ source_file: path
25
+ )
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end