better_translate 0.3.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +13 -0
- data/README.md +24 -0
- data/lib/better_translate/similarity_analyzer.rb +151 -0
- data/lib/better_translate/version.rb +1 -1
- data/lib/better_translate.rb +1 -0
- data/lib/generators/better_translate/analyze_generator.rb +43 -0
- metadata +3 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6941ef1c334c2ddbbb7141c8257f8906957fd0b60eadeee867d9a11b83788ae6
|
4
|
+
data.tar.gz: 8678aa23790a06d905592efc1571a91408e58a9f1359e70ee90174d5d2ee8975
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 548433e5223f37320a42c7ba35644b46470dc89baf42581612753d0542d039e2729cad8dd8bf4fbde1cd962ac970f397fdd1a3d98374f83fbc0380805771daae
|
7
|
+
data.tar.gz: 704eaeb04b49d7f165631443567e89afdff27a30c42893d0d04e424fe340fb113a9a4553ded29c1b4f8e9af36d75eccea0e138d0056c72442f75570340b9fa97
|
data/CHANGELOG.md
CHANGED
@@ -5,6 +5,19 @@ All notable changes to BetterTranslate will be documented in this file.
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
7
7
|
|
8
|
+
## [0.4.0] - 2025-03-11
|
9
|
+
|
10
|
+
### Added
|
11
|
+
- New Translation Similarity Analyzer:
|
12
|
+
- Identifies similar translations across language files
|
13
|
+
- Generates detailed JSON reports and human-readable summaries
|
14
|
+
- Uses Levenshtein distance for similarity calculation
|
15
|
+
- Configurable similarity threshold
|
16
|
+
- New Rails generator: `rails generate better_translate:analyze`
|
17
|
+
- Analyzes all YAML files in the locales directory
|
18
|
+
- Provides immediate feedback in the console
|
19
|
+
- Generates comprehensive similarity reports
|
20
|
+
|
8
21
|
## [0.3.1] - 2025-03-11
|
9
22
|
|
10
23
|
### Added
|
data/README.md
CHANGED
@@ -14,6 +14,7 @@ BetterTranslate simplifies the translation of YAML files in your Ruby/Rails appl
|
|
14
14
|
- 📊 Real-time progress tracking
|
15
15
|
- 🧪 Comprehensive test coverage
|
16
16
|
- ⚡️ LRU caching for performance
|
17
|
+
- 🔍 Translation similarity analysis
|
17
18
|
|
18
19
|
## Why BetterTranslate? 🤔
|
19
20
|
|
@@ -30,6 +31,10 @@ BetterTranslate simplifies the translation of YAML files in your Ruby/Rails appl
|
|
30
31
|
- Comprehensive test suite
|
31
32
|
- LRU caching for performance
|
32
33
|
- Progress tracking
|
34
|
+
- 🔍 **Translation Analysis**:
|
35
|
+
- Similarity detection
|
36
|
+
- Detailed reports
|
37
|
+
- Optimization suggestions
|
33
38
|
- 🔧 **Flexible Helpers**:
|
34
39
|
- Single text translation
|
35
40
|
- Bulk text translation
|
@@ -146,6 +151,25 @@ The gem includes generators to simplify tasks:
|
|
146
151
|
|
147
152
|
The `better_translate:translate` generator will trigger the translation process (via `BetterTranslate.magic`) and display progress in the terminal.
|
148
153
|
|
154
|
+
- **Analyze Translations:**
|
155
|
+
|
156
|
+
```bash
|
157
|
+
rails generate better_translate:analyze
|
158
|
+
```
|
159
|
+
|
160
|
+
The `better_translate:analyze` generator will:
|
161
|
+
- Scan all YAML files in your locales directory
|
162
|
+
- Find similar translations using Levenshtein distance
|
163
|
+
- Generate two reports:
|
164
|
+
- `translation_similarities.json`: Detailed JSON report
|
165
|
+
- `translation_similarities_summary.txt`: Human-readable summary
|
166
|
+
|
167
|
+
This helps you:
|
168
|
+
- Identify potentially redundant translations
|
169
|
+
- Maintain consistency across your translations
|
170
|
+
- Optimize your translation files
|
171
|
+
- Reduce translation costs
|
172
|
+
|
149
173
|
### Translation Helpers
|
150
174
|
|
151
175
|
BetterTranslate provides helper methods to simplify translation tasks.
|
@@ -0,0 +1,151 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "yaml"
|
4
|
+
require "set"
|
5
|
+
require "json"
|
6
|
+
require "time"
|
7
|
+
|
8
|
+
module BetterTranslate
|
9
|
+
class SimilarityAnalyzer
|
10
|
+
SIMILARITY_THRESHOLD = 0.75 # Abbassiamo la soglia per trovare più similarità
|
11
|
+
REPORT_FILE = "translation_similarities.json"
|
12
|
+
|
13
|
+
def initialize(yaml_files)
|
14
|
+
@yaml_files = yaml_files
|
15
|
+
@similarities = {}
|
16
|
+
end
|
17
|
+
|
18
|
+
def analyze
|
19
|
+
translations_by_language = load_translations
|
20
|
+
find_similarities(translations_by_language)
|
21
|
+
generate_report
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def load_translations
|
27
|
+
translations = {}
|
28
|
+
puts "Loading YAML files..."
|
29
|
+
|
30
|
+
@yaml_files.each do |file|
|
31
|
+
lang_code = File.basename(file, ".yml")
|
32
|
+
translations[lang_code] = YAML.load_file(file)
|
33
|
+
puts " - Loaded #{lang_code}.yml"
|
34
|
+
end
|
35
|
+
|
36
|
+
translations
|
37
|
+
end
|
38
|
+
|
39
|
+
def find_similarities(translations_by_language)
|
40
|
+
translations_by_language.each do |lang, translations|
|
41
|
+
puts "\nAnalyzing #{lang} translations..."
|
42
|
+
flattened = flatten_translations(translations)
|
43
|
+
keys = flattened.keys
|
44
|
+
similar_found = 0
|
45
|
+
|
46
|
+
keys.each_with_index do |key1, i|
|
47
|
+
value1 = flattened[key1]
|
48
|
+
|
49
|
+
# Confronta solo con le chiavi successive per evitare duplicati
|
50
|
+
keys[(i + 1)..-1].each do |key2|
|
51
|
+
value2 = flattened[key2]
|
52
|
+
|
53
|
+
similarity = calculate_similarity(value1.to_s, value2.to_s)
|
54
|
+
if similarity >= SIMILARITY_THRESHOLD
|
55
|
+
record_similarity(lang, key1, key2, value1, value2, similarity)
|
56
|
+
similar_found += 1
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
puts " Found #{similar_found} similar translations"
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def flatten_translations(hash, prefix = "", result = {})
|
66
|
+
hash.each do |key, value|
|
67
|
+
current_key = prefix.empty? ? key.to_s : "#{prefix}.#{key}"
|
68
|
+
|
69
|
+
if value.is_a?(Hash)
|
70
|
+
flatten_translations(value, current_key, result)
|
71
|
+
else
|
72
|
+
result[current_key] = value
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
result
|
77
|
+
end
|
78
|
+
|
79
|
+
def calculate_similarity(str1, str2)
|
80
|
+
# Implementazione della distanza di Levenshtein normalizzata
|
81
|
+
matrix = Array.new(str1.length + 1) { Array.new(str2.length + 1) }
|
82
|
+
|
83
|
+
(0..str1.length).each { |i| matrix[i][0] = i }
|
84
|
+
(0..str2.length).each { |j| matrix[0][j] = j }
|
85
|
+
|
86
|
+
(1..str1.length).each do |i|
|
87
|
+
(1..str2.length).each do |j|
|
88
|
+
cost = str1[i-1] == str2[j-1] ? 0 : 1
|
89
|
+
matrix[i][j] = [
|
90
|
+
matrix[i-1][j] + 1,
|
91
|
+
matrix[i][j-1] + 1,
|
92
|
+
matrix[i-1][j-1] + cost
|
93
|
+
].min
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
max_length = [str1.length, str2.length].max
|
98
|
+
1 - (matrix[str1.length][str2.length].to_f / max_length)
|
99
|
+
end
|
100
|
+
|
101
|
+
def record_similarity(lang, key1, key2, value1, value2, similarity)
|
102
|
+
@similarities[lang] ||= []
|
103
|
+
@similarities[lang] << {
|
104
|
+
key1: key1,
|
105
|
+
key2: key2,
|
106
|
+
value1: value1,
|
107
|
+
value2: value2,
|
108
|
+
similarity: similarity.round(2)
|
109
|
+
}
|
110
|
+
end
|
111
|
+
|
112
|
+
def generate_report
|
113
|
+
puts "\nGenerating reports..."
|
114
|
+
report = {
|
115
|
+
generated_at: Time.now.iso8601,
|
116
|
+
similarity_threshold: SIMILARITY_THRESHOLD,
|
117
|
+
findings: @similarities
|
118
|
+
}
|
119
|
+
|
120
|
+
File.write(REPORT_FILE, JSON.pretty_generate(report))
|
121
|
+
puts " - Generated #{REPORT_FILE}"
|
122
|
+
|
123
|
+
summary = generate_summary(report)
|
124
|
+
File.write("translation_similarities_summary.txt", summary)
|
125
|
+
puts " - Generated translation_similarities_summary.txt"
|
126
|
+
end
|
127
|
+
|
128
|
+
def generate_summary(report)
|
129
|
+
summary = ["Translation Similarities Report", "=" * 30, ""]
|
130
|
+
|
131
|
+
report[:findings].each do |lang, similarities|
|
132
|
+
summary << "Language: #{lang}"
|
133
|
+
summary << "-" * 20
|
134
|
+
|
135
|
+
similarities.each do |sim|
|
136
|
+
summary << "Similar translations found:"
|
137
|
+
summary << " Key 1: #{sim[:key1]}"
|
138
|
+
summary << " Value 1: #{sim[:value1]}"
|
139
|
+
summary << " Key 2: #{sim[:key2]}"
|
140
|
+
summary << " Value 2: #{sim[:value2]}"
|
141
|
+
summary << " Similarity: #{(sim[:similarity] * 100).round(1)}%"
|
142
|
+
summary << ""
|
143
|
+
end
|
144
|
+
|
145
|
+
summary << ""
|
146
|
+
end
|
147
|
+
|
148
|
+
summary.join("\n")
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
data/lib/better_translate.rb
CHANGED
@@ -7,6 +7,7 @@ require "better_translate/translator"
|
|
7
7
|
require "better_translate/service"
|
8
8
|
require "better_translate/writer"
|
9
9
|
require "better_translate/helper"
|
10
|
+
require "better_translate/similarity_analyzer"
|
10
11
|
|
11
12
|
require 'better_translate/providers/base_provider'
|
12
13
|
require 'better_translate/providers/chatgpt_provider'
|
@@ -0,0 +1,43 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "rails/generators"
|
4
|
+
|
5
|
+
module BetterTranslate
|
6
|
+
module Generators
|
7
|
+
class AnalyzeGenerator < Rails::Generators::Base
|
8
|
+
desc "Analyze translation files for similarities"
|
9
|
+
|
10
|
+
def analyze_translations
|
11
|
+
say "Starting translation similarity analysis...", :blue
|
12
|
+
|
13
|
+
# Find all YAML files in the locales directory
|
14
|
+
locale_dir = Rails.root.join("config", "locales")
|
15
|
+
yaml_files = Dir[locale_dir.join("*.yml")]
|
16
|
+
|
17
|
+
if yaml_files.empty?
|
18
|
+
say "No YAML files found in #{locale_dir}", :red
|
19
|
+
return
|
20
|
+
end
|
21
|
+
|
22
|
+
say "Found #{yaml_files.length} YAML files to analyze", :green
|
23
|
+
|
24
|
+
# Run analysis
|
25
|
+
analyzer = BetterTranslate::SimilarityAnalyzer.new(yaml_files)
|
26
|
+
analyzer.analyze
|
27
|
+
|
28
|
+
# Show results
|
29
|
+
say "\nAnalysis complete!", :green
|
30
|
+
say "Reports generated:", :green
|
31
|
+
say " * #{BetterTranslate::SimilarityAnalyzer::REPORT_FILE} (detailed JSON report)"
|
32
|
+
say " * translation_similarities_summary.txt (human-readable summary)"
|
33
|
+
|
34
|
+
# Show quick summary from the text file
|
35
|
+
if File.exist?("translation_similarities_summary.txt")
|
36
|
+
summary = File.read("translation_similarities_summary.txt")
|
37
|
+
say "\nQuick Summary:", :blue
|
38
|
+
say summary
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: better_translate
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Alessio Bussolari
|
@@ -222,10 +222,12 @@ files:
|
|
222
222
|
- lib/better_translate/providers/chatgpt_provider.rb
|
223
223
|
- lib/better_translate/providers/gemini_provider.rb
|
224
224
|
- lib/better_translate/service.rb
|
225
|
+
- lib/better_translate/similarity_analyzer.rb
|
225
226
|
- lib/better_translate/translator.rb
|
226
227
|
- lib/better_translate/utils.rb
|
227
228
|
- lib/better_translate/version.rb
|
228
229
|
- lib/better_translate/writer.rb
|
230
|
+
- lib/generators/better_translate/analyze_generator.rb
|
229
231
|
- lib/generators/better_translate/install_generator.rb
|
230
232
|
- lib/generators/better_translate/templates/better_translate.rb
|
231
233
|
- lib/generators/better_translate/translate_generator.rb
|