jekyll-chatgpt-translate 0.0.15 → 0.0.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +11 -5
- data/features/cli.feature +5 -4
- data/features/gem_package.feature +1 -1
- data/features/step_definitions/steps.rb +10 -10
- data/jekyll-chatgpt-translate.gemspec +1 -1
- data/lib/jekyll-chatgpt-translate/chatgpt.rb +1 -1
- data/lib/jekyll-chatgpt-translate/generator.rb +13 -12
- data/lib/jekyll-chatgpt-translate/plain.rb +19 -19
- data/lib/jekyll-chatgpt-translate/version.rb +1 -1
- data/test/test_generator.rb +41 -14
- data/test/test_plain.rb +15 -4
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f48ffb6334ada2deb878ede0622065326b23ed765946070aff20c9ce5888715d
|
4
|
+
data.tar.gz: df98890a24fb215b1ba1efc2794a6e4bf1cb0b9949efb94648b267c343052562
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 158df2fdeb7459cace117be39d3e94f9830c852034719ae049777b863bbbc61dc869c362534cc651f53e21af1fa37203865ec7b295254768274921a406cf858f
|
7
|
+
data.tar.gz: f8e34b0bd3432d598b9e7b2f5582ede75231f0c8c9e3dd71db348a5711ddf56106f7685adbf28c02a84f8ef84c7589f4a4909c650afb08d9a110751a317b8bbd
|
data/README.md
CHANGED
@@ -7,7 +7,7 @@ If you have a [Jekyll](https://jekyllrb.com/) static site, this plugin may help
|
|
7
7
|
translate its pages to another language, through [ChatGPT](https://chat.openai.com/). See how it
|
8
8
|
works for [my blog](https://github.com/yegor256/ru.yegor256.com),
|
9
9
|
for example [this page](https://ru.yegor256.com/2023-08-13-dictators.html) is translated to
|
10
|
-
[English]().
|
10
|
+
[English](https://ru.yegor256.com/english/2023-08-13-dictators.html).
|
11
11
|
|
12
12
|
Install it first:
|
13
13
|
|
@@ -18,13 +18,16 @@ gem install jekyll-chatgpt-translate
|
|
18
18
|
Then, add this to `_config.yml`:
|
19
19
|
|
20
20
|
```yaml
|
21
|
+
plugins:
|
22
|
+
- ... your other plugins here ...
|
23
|
+
- jekyll-chatgpt-translate
|
21
24
|
chatgpt-translate:
|
22
25
|
model: gpt-3.5-turbo
|
23
26
|
source: en
|
24
27
|
layout: translated
|
25
28
|
targets:
|
26
29
|
-
|
27
|
-
language:
|
30
|
+
language: zh
|
28
31
|
permalink: :year-:month-:day-:slug-chinese.html
|
29
32
|
layout: chinese-translated
|
30
33
|
-
|
@@ -32,7 +35,7 @@ chatgpt-translate:
|
|
32
35
|
permalink: :year-:month-:day-:title-french.html
|
33
36
|
```
|
34
37
|
|
35
|
-
Here, the source language is English (`en`), the target one is Chinese (`
|
38
|
+
Here, the source language is English (`en`), the target one is Chinese (`zh`),
|
36
39
|
the layout is `_layout/translated.html` (you must have this file).
|
37
40
|
|
38
41
|
OpenAI API KEY must be set in `OPENAI_API_KEY` environment variable, otherwise
|
@@ -40,9 +43,12 @@ the plugin will not do any translation and won't generate translated pages.
|
|
40
43
|
You can get your key [here](https://help.openai.com/en/articles/4936850-where-do-i-find-my-secret-api-key).
|
41
44
|
|
42
45
|
Inside the original page you can use `{{ page.translated-XX-url }}` in order to render the URL
|
43
|
-
of the translated page, where `XX` is the ISO-
|
46
|
+
of the translated page, where `XX` is the [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes)
|
47
|
+
code of the target language.
|
44
48
|
Inside the translated page you can use `{{ page.translated-original-url }}` in order
|
45
|
-
to get the URL of the page that was translated.
|
49
|
+
to get the URL of the page that was translated.
|
50
|
+
|
51
|
+
You can also use `{{ page.chatgpt-model }}`
|
46
52
|
inside both the original page and the translated one, to refer to the model of ChatGPT.
|
47
53
|
|
48
54
|
## Options
|
data/features/cli.feature
CHANGED
@@ -13,7 +13,7 @@ Feature: Simple site building
|
|
13
13
|
layout: translated
|
14
14
|
targets:
|
15
15
|
-
|
16
|
-
language:
|
16
|
+
language: zh
|
17
17
|
permalink: :year-:month-:day-:slug-chinese.html
|
18
18
|
layout: chinese-translated
|
19
19
|
-
|
@@ -22,7 +22,7 @@ Feature: Simple site building
|
|
22
22
|
"""
|
23
23
|
And I have a "_layouts/default.html" file with content:
|
24
24
|
"""
|
25
|
-
The Chinese: {{ page.translated-
|
25
|
+
The Chinese: {{ page.translated-zh-url }}
|
26
26
|
The French: {{ page.translated-fr-url }}
|
27
27
|
{{ content }}
|
28
28
|
"""
|
@@ -40,8 +40,9 @@ Feature: Simple site building
|
|
40
40
|
Hello, world!
|
41
41
|
"""
|
42
42
|
Then I build Jekyll site
|
43
|
-
And File "_chatgpt-translated/
|
44
|
-
And File "_chatgpt-translated/
|
43
|
+
And File "_chatgpt-translated/zh/2023-01-01-hello-zh.md" exists
|
44
|
+
And File "_chatgpt-translated/zh/2023-01-01-hello-zh.md" contains "/2023-01-01-hello-chinese.html"
|
45
|
+
And File "_chatgpt-translated/zh/2023-01-01-hello-zh.md" contains "translated-language: \"zh\""
|
45
46
|
And File "_site/2023/01/01/hello.html" exists
|
46
47
|
And File "_site/2023/01/01/hello.html" contains "The Chinese: /2023-01-01-hello-chinese.html"
|
47
48
|
And File "_site/2023-01-01-hello-chinese.html" exists
|
@@ -45,18 +45,18 @@ When(/^I build Jekyll site$/) do
|
|
45
45
|
@exitstatus = $CHILD_STATUS.exitstatus
|
46
46
|
end
|
47
47
|
|
48
|
-
Then(
|
49
|
-
raise "STDOUT doesn't contain '#{
|
48
|
+
Then('Stdout contains {string}') do |string|
|
49
|
+
raise "STDOUT doesn't contain '#{string}':\n#{@stdout}" unless @stdout.include?(string)
|
50
50
|
end
|
51
51
|
|
52
|
-
Then(
|
53
|
-
raise "The file \"#{
|
52
|
+
Then('File {string} exists') do |string|
|
53
|
+
raise "The file \"#{string}\" is absent:\n#{`tree -s`}" unless File.exist?(string)
|
54
54
|
end
|
55
55
|
|
56
|
-
Then(
|
57
|
-
raise "The file \"#{
|
58
|
-
content = File.read(
|
59
|
-
raise "The file \"#{
|
56
|
+
Then('File {string} contains {string}') do |string, string2|
|
57
|
+
raise "The file \"#{string}\" is absent" unless File.exist?(string)
|
58
|
+
content = File.read(string)
|
59
|
+
raise "The file \"#{string}\" doesn't contain \"#{string2}\":\n#{content}" unless content.include?(string2)
|
60
60
|
end
|
61
61
|
|
62
62
|
Then(/^Stdout is empty$/) do
|
@@ -85,10 +85,10 @@ When(/^I copy this gem into temp dir$/) do
|
|
85
85
|
FileUtils.copy_entry(@cwd, File.join(@dir, 'jekyll-chatgpt-translate'))
|
86
86
|
end
|
87
87
|
|
88
|
-
Given(
|
88
|
+
Given('It is Unix') do
|
89
89
|
pending if Gem.win_platform?
|
90
90
|
end
|
91
91
|
|
92
|
-
Given(
|
92
|
+
Given('It is Windows') do
|
93
93
|
pending unless Gem.win_platform?
|
94
94
|
end
|
@@ -28,7 +28,7 @@ Gem::Specification.new do |s|
|
|
28
28
|
s.required_rubygems_version = Gem::Requirement.new('>= 0') if s.respond_to? :required_rubygems_version=
|
29
29
|
s.required_ruby_version = '>= 2.6'
|
30
30
|
s.name = 'jekyll-chatgpt-translate'
|
31
|
-
s.version = '0.0.
|
31
|
+
s.version = '0.0.17'
|
32
32
|
s.license = 'MIT'
|
33
33
|
s.summary = 'Translate Jekyll Pages Through ChatGPT'
|
34
34
|
s.description = [
|
@@ -52,7 +52,7 @@ class GptTranslate::ChatGPT
|
|
52
52
|
if par.length <= 32
|
53
53
|
Jekyll.logger.debug("Not translating this, b/c too short: \"#{par}\"")
|
54
54
|
par
|
55
|
-
elsif par !~ /^[
|
55
|
+
elsif par !~ /^[[:alpha:]]/
|
56
56
|
Jekyll.logger.debug("Not translating this, b/c it's not a plain text: \"#{par}\"")
|
57
57
|
par
|
58
58
|
elsif @key.empty?
|
@@ -54,7 +54,8 @@ class GptTranslate::Generator < Jekyll::Generator
|
|
54
54
|
version = config['version'] || GptTranslate::VERSION
|
55
55
|
threshold = config['threshold'] || 1024
|
56
56
|
start = Time.now
|
57
|
-
|
57
|
+
translated = 0
|
58
|
+
copied = 0
|
58
59
|
model = config['model'] || 'gpt-3.5-turbo'
|
59
60
|
marker = "Translated by ChatGPT #{model}/#{version}"
|
60
61
|
site.posts.docs.shuffle.each do |doc|
|
@@ -63,24 +64,24 @@ class GptTranslate::Generator < Jekyll::Generator
|
|
63
64
|
link = GptTranslate::Permalink.new(doc, target['permalink']).to_path
|
64
65
|
lang = target['language']
|
65
66
|
raise 'Language must be defined for each target' if target.nil?
|
66
|
-
if total >= threshold
|
67
|
-
Jekyll.logger.info("Already generated #{total} pages, that's enough for today")
|
68
|
-
break
|
69
|
-
end
|
70
67
|
path = File.join(home, lang, doc.basename.gsub(/\.md$/, "-#{lang}.md"))
|
71
68
|
FileUtils.mkdir_p(File.dirname(path))
|
72
69
|
File.write(path, '') # in order to surpress warnings in Page ctor
|
73
70
|
dest = Jekyll::Page.new(site, site.source, File.dirname(path), File.basename(path)).destination(site.dest)
|
74
71
|
doc.data["translated-#{lang}-url"] = link
|
75
72
|
doc.data['chatgpt-model'] = model
|
76
|
-
|
73
|
+
if GptTranslate::Ping.new(site, link).found?(dest, version)
|
74
|
+
copied += 1
|
75
|
+
next
|
76
|
+
end
|
77
|
+
next if translated >= threshold
|
77
78
|
gpt = GptTranslate::ChatGPT.new(
|
78
79
|
key,
|
79
80
|
model,
|
80
81
|
config['source'] || 'en',
|
81
82
|
lang
|
82
83
|
)
|
83
|
-
|
84
|
+
foreign = gpt.translate(plain)
|
84
85
|
File.write(
|
85
86
|
path,
|
86
87
|
[
|
@@ -90,21 +91,21 @@ class GptTranslate::Generator < Jekyll::Generator
|
|
90
91
|
"description: #{doc['description'].to_json}",
|
91
92
|
"permalink: #{link.to_json}",
|
92
93
|
"translated-original-url: #{doc.url.to_json}",
|
94
|
+
"translated-language: #{lang.to_json}",
|
93
95
|
"chatgpt-model: #{model.to_json}",
|
94
96
|
'---',
|
95
97
|
'',
|
96
|
-
|
98
|
+
foreign,
|
97
99
|
'',
|
98
100
|
"#{marker} on #{Time.now.strftime('%d/%m/%Y %H:%M')}\n{: .jekyll-chatgpt-translate}"
|
99
101
|
].join("\n")
|
100
102
|
)
|
101
103
|
site.pages << Jekyll::Page.new(site, site.source, File.dirname(path), File.basename(path))
|
102
|
-
|
103
|
-
Jekyll.logger.info("Translated via ChatGPT: #{path}")
|
104
|
+
translated += 1
|
105
|
+
Jekyll.logger.info("Translated via ChatGPT: #{path} (#{File.size(path)} bytes)")
|
104
106
|
end
|
105
|
-
break if total >= threshold
|
106
107
|
end
|
107
|
-
Jekyll.logger.info("#{
|
108
|
+
Jekyll.logger.info("#{translated} pages translated and #{copied} pages copied in #{(Time.now - start).round(2)}s")
|
108
109
|
end
|
109
110
|
|
110
111
|
private
|
@@ -40,36 +40,26 @@ class GptTranslate::Plain
|
|
40
40
|
def to_s
|
41
41
|
# To turn compact lists into proper lists
|
42
42
|
@markdown.gsub(/([^\n])\n(\s*\*)/, "\\1\n\n\\2").split(/\n{2,}/).compact.map do |par|
|
43
|
-
par.
|
44
|
-
par.gsub!(/\n+/, ' ')
|
45
|
-
par.gsub!(/ {2,}/, ' ')
|
43
|
+
par.strip!
|
46
44
|
# Liquid tags are removed, but this implementation is primitive
|
47
45
|
# Seehttps://stackoverflow.com/questions/
|
48
46
|
par.gsub!(/{{[^}]+}}/, '')
|
49
47
|
par.gsub!(/{%.+?%}/, '')
|
50
48
|
par.gsub!(/<!--.+?-->/m, '')
|
51
|
-
par.
|
52
|
-
|
49
|
+
par = Redcarpet::Markdown.new(Strip).render(par)
|
50
|
+
par.gsub!("\t", ' ')
|
51
|
+
par.gsub!(/\n+/, ' ') unless par.start_with?('```')
|
52
|
+
par.gsub!(/ {2,}/, ' ') unless par.start_with?('```')
|
53
|
+
par.strip
|
53
54
|
end.join("\n\n").gsub(/\n{2,}/, "\n\n").strip
|
54
55
|
end
|
55
56
|
|
56
|
-
# # To ignore/remove Liquid tags.
|
57
|
-
# class NullDrop < Liquid::Drop
|
58
|
-
# def method_missing(*)
|
59
|
-
# nil
|
60
|
-
# end
|
61
|
-
|
62
|
-
# def respond_to_missing?(*)
|
63
|
-
# true
|
64
|
-
# end
|
65
|
-
# end
|
66
|
-
|
67
57
|
# Markdown to pain text.
|
68
58
|
class Strip < Redcarpet::Render::Base
|
69
59
|
%i[
|
70
60
|
block_code block_quote
|
71
61
|
block_html
|
72
|
-
autolink
|
62
|
+
autolink double_emphasis
|
73
63
|
emphasis underline
|
74
64
|
triple_emphasis strikethrough
|
75
65
|
superscript highlight quote
|
@@ -81,8 +71,18 @@ class GptTranslate::Plain
|
|
81
71
|
end
|
82
72
|
end
|
83
73
|
|
84
|
-
def
|
85
|
-
|
74
|
+
def codespan(content)
|
75
|
+
if content.start_with?("\n")
|
76
|
+
"```#{content}```"
|
77
|
+
elsif content.end_with?("\n")
|
78
|
+
"```\n#{content.split("\n", 2)[1]}```"
|
79
|
+
else
|
80
|
+
content
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
def raw_html(content)
|
85
|
+
content
|
86
86
|
end
|
87
87
|
|
88
88
|
def list(content, _type)
|
data/test/test_generator.rb
CHANGED
@@ -33,19 +33,16 @@ require_relative '../lib/jekyll-chatgpt-translate/generator'
|
|
33
33
|
# License:: MIT
|
34
34
|
class GptTranslate::GeneratorTest < Minitest::Test
|
35
35
|
class FakeSite
|
36
|
-
attr_reader :config
|
36
|
+
attr_reader :config, :pages
|
37
37
|
|
38
|
-
def initialize(config,
|
38
|
+
def initialize(config, docs)
|
39
39
|
@config = config
|
40
|
-
@
|
40
|
+
@docs = docs
|
41
|
+
@pages = []
|
41
42
|
end
|
42
43
|
|
43
44
|
def posts
|
44
|
-
FakePosts.new(@
|
45
|
-
end
|
46
|
-
|
47
|
-
def pages
|
48
|
-
[]
|
45
|
+
FakePosts.new(@docs)
|
49
46
|
end
|
50
47
|
|
51
48
|
def permalink_style
|
@@ -65,7 +62,7 @@ class GptTranslate::GeneratorTest < Minitest::Test
|
|
65
62
|
end
|
66
63
|
|
67
64
|
def dest
|
68
|
-
|
65
|
+
File.dirname(@docs[0])
|
69
66
|
end
|
70
67
|
|
71
68
|
def in_theme_dir(base, _foo = nil, _bar = nil)
|
@@ -118,12 +115,12 @@ class GptTranslate::GeneratorTest < Minitest::Test
|
|
118
115
|
class FakePosts
|
119
116
|
attr_reader :config
|
120
117
|
|
121
|
-
def initialize(
|
122
|
-
@
|
118
|
+
def initialize(docs)
|
119
|
+
@docs = docs
|
123
120
|
end
|
124
121
|
|
125
122
|
def docs
|
126
|
-
|
123
|
+
@docs.map { |d| FakeDocument.new(d) }
|
127
124
|
end
|
128
125
|
end
|
129
126
|
|
@@ -137,18 +134,48 @@ class GptTranslate::GeneratorTest < Minitest::Test
|
|
137
134
|
'chatgpt-translate' => {
|
138
135
|
'targets' => [
|
139
136
|
{
|
140
|
-
'language' => '
|
137
|
+
'language' => 'zh',
|
141
138
|
'layout' => 'chinese',
|
142
139
|
'permalink' => ':slug.html'
|
143
140
|
}
|
144
141
|
]
|
145
142
|
}
|
146
143
|
},
|
147
|
-
|
144
|
+
[post]
|
145
|
+
)
|
146
|
+
gen = GptTranslate::Generator.new
|
147
|
+
stub_request(:get, 'https://www.yegor256.com/.html').to_return(body: '')
|
148
|
+
gen.generate(site)
|
149
|
+
assert_equal(1, site.pages.count)
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
def test_threshold_stops
|
154
|
+
Dir.mktmpdir do |home|
|
155
|
+
post = File.join(home, '2023-01-01-hello.md')
|
156
|
+
File.write(post, "---\ntitle: Hello\n---\n\nHello, world!")
|
157
|
+
site = FakeSite.new(
|
158
|
+
{
|
159
|
+
'chatgpt-translate' => {
|
160
|
+
'threshold' => 1,
|
161
|
+
'targets' => [
|
162
|
+
{
|
163
|
+
'language' => 'zh',
|
164
|
+
'permalink' => ':slug.html'
|
165
|
+
},
|
166
|
+
{
|
167
|
+
'language' => 'fr',
|
168
|
+
'permalink' => ':year/:slug.html'
|
169
|
+
}
|
170
|
+
]
|
171
|
+
}
|
172
|
+
},
|
173
|
+
[post, post]
|
148
174
|
)
|
149
175
|
gen = GptTranslate::Generator.new
|
150
176
|
stub_request(:get, 'https://www.yegor256.com/.html').to_return(body: '')
|
151
177
|
gen.generate(site)
|
178
|
+
assert_equal(1, site.pages.count)
|
152
179
|
end
|
153
180
|
end
|
154
181
|
end
|
data/test/test_plain.rb
CHANGED
@@ -78,17 +78,28 @@ class GptTranslate::PlainTest < Minitest::Test
|
|
78
78
|
|
79
79
|
def test_code_block
|
80
80
|
assert_equal(
|
81
|
-
"Hello:\n\nJava",
|
81
|
+
"Hello:\n\n```\nJava\n```",
|
82
82
|
GptTranslate::Plain.new("Hello:\n\n```\nJava\n```\n").to_s
|
83
83
|
)
|
84
84
|
end
|
85
85
|
|
86
86
|
def test_html
|
87
87
|
assert_equal(
|
88
|
-
'This is picture:
|
88
|
+
'This is picture: <img src="a"/>!',
|
89
89
|
GptTranslate::Plain.new('This is picture: <img src="a"/>!').to_s
|
90
90
|
)
|
91
|
-
assert_equal('
|
91
|
+
assert_equal('<img src="a"/>', GptTranslate::Plain.new('<img src="a"/>').to_s)
|
92
|
+
end
|
93
|
+
|
94
|
+
def test_big_code
|
95
|
+
assert_equal(
|
96
|
+
"```\nHello\n```",
|
97
|
+
GptTranslate::Plain.new("```\nHello\n```").to_s
|
98
|
+
)
|
99
|
+
assert_equal(
|
100
|
+
"```\nprint('hi!')\n```",
|
101
|
+
GptTranslate::Plain.new("```java\nprint('hi!')\n```").to_s
|
102
|
+
)
|
92
103
|
end
|
93
104
|
|
94
105
|
def test_liquid_tags
|
@@ -97,7 +108,7 @@ class GptTranslate::PlainTest < Minitest::Test
|
|
97
108
|
GptTranslate::Plain.new('Hello, {{ Java }}!').to_s
|
98
109
|
)
|
99
110
|
assert_equal(
|
100
|
-
'Hello,
|
111
|
+
'Hello, dude !',
|
101
112
|
GptTranslate::Plain.new('Hello, {% if a %} dude {% endif %}!').to_s
|
102
113
|
)
|
103
114
|
assert_equal(
|