compose 0.1.6 → 0.1.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/config/prompts/markdown_cleanup.txt +30 -0
- data/lib/compose/ai_client.rb +13 -4
- data/lib/compose/cli.rb +16 -6
- data/lib/compose/config.rb +13 -0
- data/lib/compose/edit_verifier.rb +3 -4
- data/lib/compose/file_processor.rb +67 -7
- data/lib/compose/version.rb +1 -1
- data/lib/compose.rb +1 -0
- metadata +60 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 050edb82f3b985eefdea7e2b791019968f7e8f13770a07652e12e0f273e159db
|
4
|
+
data.tar.gz: 8bce91cfa68da8141cca2ce29a8e9ab25be1051e5d72b93c6376a77bd84f222d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9e07206841afc24ee8daa040c5f11183198072f0e98913da0c5a1ac3d4605b14daf2ee130f93443eeb2f3fc6e7191021ccb1114210471d62353c5011de1fd8de
|
7
|
+
data.tar.gz: e0406eb23a9d5368cd9bc527237c152308716839afe9e649f3837434988dc07f246bbfd06281cbbfc0b62da7b98d2e3944af1783a181487b5c5c10935a967013
|
@@ -0,0 +1,30 @@
|
|
1
|
+
You are tasked with cleaning up a markdown file that was converted from HTML. The file contains unnecessary elements that need to be removed to isolate the main content of the page. Here is the markdown content to clean:
|
2
|
+
|
3
|
+
<markdown>
|
4
|
+
{{MARKDOWN}}
|
5
|
+
</markdown>
|
6
|
+
|
7
|
+
Your goal is to remove all artifacts and unnecessary elements from the markdown, leaving only the main content of the page.
|
8
|
+
|
9
|
+
Common artifacts and unnecessary elements to look for and remove include:
|
10
|
+
|
11
|
+
1. Header and footer sections
|
12
|
+
2. Navigation menus
|
13
|
+
3. Sidebar content
|
14
|
+
4. Social media sharing buttons
|
15
|
+
5. Advertisement blocks
|
16
|
+
6. Copyright notices
|
17
|
+
|
18
|
+
Follow these steps to clean the markdown content:
|
19
|
+
|
20
|
+
1. Identify the main content section of the page. This is typically the largest block of text that contains the primary information.
|
21
|
+
2. If there is no H1 heading, try to identify the main title or heading of the page and format it as an H1 heading.
|
22
|
+
3. Remove any content before the main content section, including headers, navigation menus, and introductory elements.
|
23
|
+
4. Remove any content after the main content section, including footers, related links, and closing elements.
|
24
|
+
5. Ensure that headings are properly formatted with the correct number of `#` symbols.
|
25
|
+
6. Preserve any important links, image references, captions, lists, tables,
|
26
|
+
code blocks etc. that are part of the main content.
|
27
|
+
|
28
|
+
Be careful not to remove any part of the main content while cleaning up the markdown.
|
29
|
+
|
30
|
+
Once you have cleaned the markdown content, provide only the cleanup markdown. Ensure that only the main content of the page remains, formatted correctly in markdown. Do not surround the response with backticks.
|
data/lib/compose/ai_client.rb
CHANGED
@@ -17,7 +17,7 @@ module Compose
|
|
17
17
|
response.dig('content', 0, 'text')
|
18
18
|
when 'openai'
|
19
19
|
response = openai_chat(system_prompt, user_prompt, temperature: temperature)
|
20
|
-
response
|
20
|
+
response
|
21
21
|
else
|
22
22
|
raise "Unsupported model: #{@model[:name]}"
|
23
23
|
end
|
@@ -44,18 +44,27 @@ module Compose
|
|
44
44
|
end
|
45
45
|
|
46
46
|
def openai_chat(system_prompt, user_prompt, temperature: 0)
|
47
|
-
Whirly.start spinner: 'dots', status: '
|
47
|
+
Whirly.start spinner: 'dots', status: 'Received 0 chunks'
|
48
|
+
token_count = 0
|
49
|
+
response = ""
|
48
50
|
client = OpenAI::Client.new(access_token: ENV['OPENAI_API_KEY'])
|
49
|
-
|
51
|
+
client.chat(
|
50
52
|
parameters: {
|
51
53
|
model: @model[:name],
|
52
54
|
messages: [
|
53
55
|
{ role: 'system', content: system_prompt },
|
54
56
|
{ role: 'user', content: user_prompt }
|
55
57
|
],
|
56
|
-
temperature: temperature
|
58
|
+
temperature: temperature,
|
59
|
+
stream: Proc.new { |chunk|
|
60
|
+
token_count += 1
|
61
|
+
Whirly.status = "Received #{token_count} chunks"
|
62
|
+
content = chunk.dig("choices", 0, "delta", "content")
|
63
|
+
response += content if content
|
64
|
+
}
|
57
65
|
}
|
58
66
|
)
|
67
|
+
|
59
68
|
Whirly.stop
|
60
69
|
response
|
61
70
|
end
|
data/lib/compose/cli.rb
CHANGED
@@ -8,6 +8,7 @@ module Compose
|
|
8
8
|
desc 'edit [FILES]', 'Edit files using AI assistance'
|
9
9
|
method_option :model, type: :string, default: 'sonnet35', desc: 'AI model to use'
|
10
10
|
method_option :include_imports, type: :boolean, default: true, aliases: '-a', desc: 'Include import statements when processing files'
|
11
|
+
method_option :clear_cache, type: :boolean, default: false, aliases: '-c', desc: 'Clear the cache before processing files'
|
11
12
|
def edit(*files)
|
12
13
|
puts 'Welcome to Compose!'.green
|
13
14
|
|
@@ -16,12 +17,21 @@ module Compose
|
|
16
17
|
exit(1)
|
17
18
|
end
|
18
19
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
20
|
+
if options[:clear_cache]
|
21
|
+
FileUtils.rm_rf(FileProcessor::CACHE_DIR)
|
22
|
+
puts 'Cache cleared'.green
|
23
|
+
end
|
24
|
+
|
25
|
+
# Set up configuration
|
26
|
+
Compose::Config.configure do |config|
|
27
|
+
config.code_model = Model.find(options[:model])
|
28
|
+
config.verifier_model = Model.preferred_verifier_model
|
29
|
+
end
|
30
|
+
|
31
|
+
ApiKeyUtils.setup(Compose::Config.code_model)
|
32
|
+
ApiKeyUtils.setup(Compose::Config.verifier_model)
|
23
33
|
|
24
|
-
file_processor = FileProcessor.new(files, include_imports: options[:include_imports]
|
34
|
+
file_processor = FileProcessor.new(files, include_imports: options[:include_imports])
|
25
35
|
puts "Loaded #{file_processor.files.count} file(s): #{file_processor.files.keys.map { |path| File.exist?(path) ? Pathname.new(path).relative_path_from(Pathname.pwd).to_s : path }.join(', ')}"
|
26
36
|
|
27
37
|
task = ask('What do you need me to do? (Type \'ask\' followed by your question to ask a question instead):')
|
@@ -33,7 +43,7 @@ module Compose
|
|
33
43
|
else
|
34
44
|
edits = file_processor.edit_files(task)
|
35
45
|
|
36
|
-
edit_verifier = EditVerifier.new(edits
|
46
|
+
edit_verifier = EditVerifier.new(edits)
|
37
47
|
verified_edits = edit_verifier.verify_edits
|
38
48
|
|
39
49
|
edit_processor = EditProcessor.new(verified_edits)
|
@@ -2,10 +2,9 @@
|
|
2
2
|
|
3
3
|
module Compose
|
4
4
|
class EditVerifier
|
5
|
-
attr_reader :
|
5
|
+
attr_reader :edits
|
6
6
|
|
7
|
-
def initialize(edits
|
8
|
-
@ai_client = AIClient.new(model)
|
7
|
+
def initialize(edits)
|
9
8
|
@edits = edits
|
10
9
|
end
|
11
10
|
|
@@ -33,7 +32,7 @@ module Compose
|
|
33
32
|
file_content = FileProcessor.load_file(File.expand_path(edit[:filename]))
|
34
33
|
system_prompt = "CODE:\n#{file_content}\n"
|
35
34
|
|
36
|
-
response =
|
35
|
+
response = AIClient.new(Compose::Config.verifier_model).chat(system_prompt, prompt)
|
37
36
|
|
38
37
|
begin
|
39
38
|
verified_change = JSON.parse(response)
|
@@ -2,9 +2,16 @@
|
|
2
2
|
|
3
3
|
require 'find'
|
4
4
|
require 'pathname'
|
5
|
+
require 'selenium-webdriver'
|
6
|
+
require 'nokogiri'
|
7
|
+
require 'reverse_markdown'
|
8
|
+
require 'digest/md5'
|
9
|
+
require 'fileutils'
|
5
10
|
|
6
11
|
module Compose
|
7
12
|
class FileProcessor
|
13
|
+
CACHE_DIR = File.join(ENV['HOME'], '.cache', 'compose')
|
14
|
+
|
8
15
|
attr_reader :ai_client, :files, :tokens
|
9
16
|
|
10
17
|
def self.load_file(file_path, include_imports: true)
|
@@ -20,12 +27,13 @@ module Compose
|
|
20
27
|
"<#{relative_path}>\n#{processed_content}\n</#{relative_path}>"
|
21
28
|
end
|
22
29
|
|
23
|
-
def initialize(input_files, include_imports: true
|
24
|
-
@ai_client = AIClient.new(model)
|
30
|
+
def initialize(input_files, include_imports: true)
|
25
31
|
@files = {}
|
26
32
|
|
27
33
|
input_files.each do |input|
|
28
|
-
if
|
34
|
+
if input.start_with?('http://', 'https://')
|
35
|
+
@files[input] = process_url(input)
|
36
|
+
elsif File.directory?(input)
|
29
37
|
find_files_in_directory(input).each do |file|
|
30
38
|
@files[File.expand_path(file)] = nil
|
31
39
|
end
|
@@ -36,8 +44,8 @@ module Compose
|
|
36
44
|
end
|
37
45
|
end
|
38
46
|
|
39
|
-
@files.each do |
|
40
|
-
@files[
|
47
|
+
@files.each do |path, _|
|
48
|
+
@files[path] ||= FileProcessor.load_file(path, include_imports: include_imports)
|
41
49
|
end
|
42
50
|
|
43
51
|
self
|
@@ -52,7 +60,7 @@ module Compose
|
|
52
60
|
prompt.gsub!('{{QUESTION}}', question)
|
53
61
|
system_prompt = "CODE:\n#{content}\n"
|
54
62
|
|
55
|
-
|
63
|
+
AIClient.new(Compose::Config.code_model).chat(system_prompt, prompt)
|
56
64
|
end
|
57
65
|
|
58
66
|
def edit_files(task)
|
@@ -60,7 +68,7 @@ module Compose
|
|
60
68
|
prompt.gsub!('{{TASK}}', task)
|
61
69
|
system_prompt = "CODE:\n#{content}\n"
|
62
70
|
|
63
|
-
response =
|
71
|
+
response = AIClient.new(Compose::Config.code_model).chat(system_prompt, prompt)
|
64
72
|
|
65
73
|
begin
|
66
74
|
edits = JSON.parse(response)['edits']
|
@@ -97,5 +105,57 @@ module Compose
|
|
97
105
|
rescue
|
98
106
|
false
|
99
107
|
end
|
108
|
+
|
109
|
+
def process_url(url)
|
110
|
+
cached_file = File.join(CACHE_DIR, Digest::MD5.hexdigest(url))
|
111
|
+
content = if File.exist?(cached_file)
|
112
|
+
File.read(cached_file)
|
113
|
+
else
|
114
|
+
content = fetch_and_convert_url(url)
|
115
|
+
return if content.nil?
|
116
|
+
|
117
|
+
FileUtils.mkdir_p(File.dirname(cached_file))
|
118
|
+
File.write(cached_file, content)
|
119
|
+
content
|
120
|
+
end
|
121
|
+
return if content.nil?
|
122
|
+
|
123
|
+
"<url src='#{url}'>\n#{content}\n</url>"
|
124
|
+
end
|
125
|
+
|
126
|
+
def fetch_and_convert_url(url)
|
127
|
+
options = Selenium::WebDriver::Chrome::Options.new
|
128
|
+
options.add_argument('--headless')
|
129
|
+
options.add_argument('--disable-gpu')
|
130
|
+
options.add_argument('--no-sandbox')
|
131
|
+
options.add_argument('--disable-dev-shm-usage')
|
132
|
+
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
|
133
|
+
|
134
|
+
driver = Selenium::WebDriver.for :chrome, options: options
|
135
|
+
|
136
|
+
begin
|
137
|
+
driver.get(url)
|
138
|
+
html_content = driver.page_source
|
139
|
+
doc = Nokogiri::HTML(html_content)
|
140
|
+
doc.search('script, style').remove
|
141
|
+
|
142
|
+
parsed_markdown = ReverseMarkdown.convert(doc.to_html, unknown_tags: :bypass)
|
143
|
+
normalized_markdown = parsed_markdown.gsub(/ +/, ' ').strip
|
144
|
+
|
145
|
+
# Remove content before the first h1 in markdown
|
146
|
+
first_h1_index = normalized_markdown.index(/^# /)
|
147
|
+
if first_h1_index
|
148
|
+
normalized_markdown = normalized_markdown[first_h1_index..-1]
|
149
|
+
end
|
150
|
+
|
151
|
+
prompt = File.read(File.expand_path("../../config/prompts/markdown_cleanup.txt", __dir__))
|
152
|
+
prompt.gsub!('{{MARKDOWN}}', normalized_markdown)
|
153
|
+
cleaned_markdown = AIClient.new(Compose::Config.verifier_model).chat("", prompt)
|
154
|
+
|
155
|
+
cleaned_markdown
|
156
|
+
ensure
|
157
|
+
driver.quit
|
158
|
+
end
|
159
|
+
end
|
100
160
|
end
|
101
161
|
end
|
data/lib/compose/version.rb
CHANGED
data/lib/compose.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: compose
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dreaming Tulpa
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-09-
|
11
|
+
date: 2024-09-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ruby-openai
|
@@ -164,6 +164,62 @@ dependencies:
|
|
164
164
|
- - ">="
|
165
165
|
- !ruby/object:Gem::Version
|
166
166
|
version: '0'
|
167
|
+
- !ruby/object:Gem::Dependency
|
168
|
+
name: selenium-webdriver
|
169
|
+
requirement: !ruby/object:Gem::Requirement
|
170
|
+
requirements:
|
171
|
+
- - ">="
|
172
|
+
- !ruby/object:Gem::Version
|
173
|
+
version: '0'
|
174
|
+
type: :runtime
|
175
|
+
prerelease: false
|
176
|
+
version_requirements: !ruby/object:Gem::Requirement
|
177
|
+
requirements:
|
178
|
+
- - ">="
|
179
|
+
- !ruby/object:Gem::Version
|
180
|
+
version: '0'
|
181
|
+
- !ruby/object:Gem::Dependency
|
182
|
+
name: nokogiri
|
183
|
+
requirement: !ruby/object:Gem::Requirement
|
184
|
+
requirements:
|
185
|
+
- - ">="
|
186
|
+
- !ruby/object:Gem::Version
|
187
|
+
version: '0'
|
188
|
+
type: :runtime
|
189
|
+
prerelease: false
|
190
|
+
version_requirements: !ruby/object:Gem::Requirement
|
191
|
+
requirements:
|
192
|
+
- - ">="
|
193
|
+
- !ruby/object:Gem::Version
|
194
|
+
version: '0'
|
195
|
+
- !ruby/object:Gem::Dependency
|
196
|
+
name: reverse_markdown
|
197
|
+
requirement: !ruby/object:Gem::Requirement
|
198
|
+
requirements:
|
199
|
+
- - ">="
|
200
|
+
- !ruby/object:Gem::Version
|
201
|
+
version: '0'
|
202
|
+
type: :runtime
|
203
|
+
prerelease: false
|
204
|
+
version_requirements: !ruby/object:Gem::Requirement
|
205
|
+
requirements:
|
206
|
+
- - ">="
|
207
|
+
- !ruby/object:Gem::Version
|
208
|
+
version: '0'
|
209
|
+
- !ruby/object:Gem::Dependency
|
210
|
+
name: digest
|
211
|
+
requirement: !ruby/object:Gem::Requirement
|
212
|
+
requirements:
|
213
|
+
- - ">="
|
214
|
+
- !ruby/object:Gem::Version
|
215
|
+
version: '0'
|
216
|
+
type: :runtime
|
217
|
+
prerelease: false
|
218
|
+
version_requirements: !ruby/object:Gem::Requirement
|
219
|
+
requirements:
|
220
|
+
- - ">="
|
221
|
+
- !ruby/object:Gem::Version
|
222
|
+
version: '0'
|
167
223
|
- !ruby/object:Gem::Dependency
|
168
224
|
name: bundler
|
169
225
|
requirement: !ruby/object:Gem::Requirement
|
@@ -218,12 +274,14 @@ files:
|
|
218
274
|
- bin/compose
|
219
275
|
- config/models.yml
|
220
276
|
- config/prompts/ask.txt
|
277
|
+
- config/prompts/markdown_cleanup.txt
|
221
278
|
- config/prompts/task.txt
|
222
279
|
- config/prompts/verify.txt
|
223
280
|
- lib/compose.rb
|
224
281
|
- lib/compose/ai_client.rb
|
225
282
|
- lib/compose/api_key_utils.rb
|
226
283
|
- lib/compose/cli.rb
|
284
|
+
- lib/compose/config.rb
|
227
285
|
- lib/compose/edit_processor.rb
|
228
286
|
- lib/compose/edit_verifier.rb
|
229
287
|
- lib/compose/file_processor.rb
|