compose 0.1.7 → 0.1.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/config/prompts/markdown_cleanup.txt +30 -0
- data/lib/compose/ai_client.rb +13 -4
- data/lib/compose/cli.rb +16 -6
- data/lib/compose/config.rb +13 -0
- data/lib/compose/edit_verifier.rb +3 -4
- data/lib/compose/file_processor.rb +45 -16
- data/lib/compose/version.rb +1 -1
- data/lib/compose.rb +1 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 050edb82f3b985eefdea7e2b791019968f7e8f13770a07652e12e0f273e159db
|
4
|
+
data.tar.gz: 8bce91cfa68da8141cca2ce29a8e9ab25be1051e5d72b93c6376a77bd84f222d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9e07206841afc24ee8daa040c5f11183198072f0e98913da0c5a1ac3d4605b14daf2ee130f93443eeb2f3fc6e7191021ccb1114210471d62353c5011de1fd8de
|
7
|
+
data.tar.gz: e0406eb23a9d5368cd9bc527237c152308716839afe9e649f3837434988dc07f246bbfd06281cbbfc0b62da7b98d2e3944af1783a181487b5c5c10935a967013
|
@@ -0,0 +1,30 @@
|
|
1
|
+
You are tasked with cleaning up a markdown file that was converted from HTML. The file contains unnecessary elements that need to be removed to isolate the main content of the page. Here is the markdown content to clean:
|
2
|
+
|
3
|
+
<markdown>
|
4
|
+
{{MARKDOWN}}
|
5
|
+
</markdown>
|
6
|
+
|
7
|
+
Your goal is to remove all artifacts and unnecessary elements from the markdown, leaving only the main content of the page.
|
8
|
+
|
9
|
+
Common artifacts and unnecessary elements to look for and remove include:
|
10
|
+
|
11
|
+
1. Header and footer sections
|
12
|
+
2. Navigation menus
|
13
|
+
3. Sidebar content
|
14
|
+
4. Social media sharing buttons
|
15
|
+
5. Advertisement blocks
|
16
|
+
6. Copyright notices
|
17
|
+
|
18
|
+
Follow these steps to clean the markdown content:
|
19
|
+
|
20
|
+
1. Identify the main content section of the page. This is typically the largest block of text that contains the primary information.
|
21
|
+
2. If there is no H1 heading, try to identify the main title or heading of the page and format it as an H1 heading.
|
22
|
+
3. Remove any content before the main content section, including headers, navigation menus, and introductory elements.
|
23
|
+
4. Remove any content after the main content section, including footers, related links, and closing elements.
|
24
|
+
5. Ensure that headings are properly formatted with the correct number of `#` symbols.
|
25
|
+
6. Preserve any important links, image references, captions, lists, tables,
|
26
|
+
code blocks etc. that are part of the main content.
|
27
|
+
|
28
|
+
Be careful not to remove any part of the main content while cleaning up the markdown.
|
29
|
+
|
30
|
+
Once you have cleaned the markdown content, provide only the cleanup markdown. Ensure that only the main content of the page remains, formatted correctly in markdown. Do not surround the response with backticks.
|
data/lib/compose/ai_client.rb
CHANGED
@@ -17,7 +17,7 @@ module Compose
|
|
17
17
|
response.dig('content', 0, 'text')
|
18
18
|
when 'openai'
|
19
19
|
response = openai_chat(system_prompt, user_prompt, temperature: temperature)
|
20
|
-
response
|
20
|
+
response
|
21
21
|
else
|
22
22
|
raise "Unsupported model: #{@model[:name]}"
|
23
23
|
end
|
@@ -44,18 +44,27 @@ module Compose
|
|
44
44
|
end
|
45
45
|
|
46
46
|
def openai_chat(system_prompt, user_prompt, temperature: 0)
|
47
|
-
Whirly.start spinner: 'dots', status: '
|
47
|
+
Whirly.start spinner: 'dots', status: 'Received 0 chunks'
|
48
|
+
token_count = 0
|
49
|
+
response = ""
|
48
50
|
client = OpenAI::Client.new(access_token: ENV['OPENAI_API_KEY'])
|
49
|
-
|
51
|
+
client.chat(
|
50
52
|
parameters: {
|
51
53
|
model: @model[:name],
|
52
54
|
messages: [
|
53
55
|
{ role: 'system', content: system_prompt },
|
54
56
|
{ role: 'user', content: user_prompt }
|
55
57
|
],
|
56
|
-
temperature: temperature
|
58
|
+
temperature: temperature,
|
59
|
+
stream: Proc.new { |chunk|
|
60
|
+
token_count += 1
|
61
|
+
Whirly.status = "Received #{token_count} chunks"
|
62
|
+
content = chunk.dig("choices", 0, "delta", "content")
|
63
|
+
response += content if content
|
64
|
+
}
|
57
65
|
}
|
58
66
|
)
|
67
|
+
|
59
68
|
Whirly.stop
|
60
69
|
response
|
61
70
|
end
|
data/lib/compose/cli.rb
CHANGED
@@ -8,6 +8,7 @@ module Compose
|
|
8
8
|
desc 'edit [FILES]', 'Edit files using AI assistance'
|
9
9
|
method_option :model, type: :string, default: 'sonnet35', desc: 'AI model to use'
|
10
10
|
method_option :include_imports, type: :boolean, default: true, aliases: '-a', desc: 'Include import statements when processing files'
|
11
|
+
method_option :clear_cache, type: :boolean, default: false, aliases: '-c', desc: 'Clear the cache before processing files'
|
11
12
|
def edit(*files)
|
12
13
|
puts 'Welcome to Compose!'.green
|
13
14
|
|
@@ -16,12 +17,21 @@ module Compose
|
|
16
17
|
exit(1)
|
17
18
|
end
|
18
19
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
20
|
+
if options[:clear_cache]
|
21
|
+
FileUtils.rm_rf(FileProcessor::CACHE_DIR)
|
22
|
+
puts 'Cache cleared'.green
|
23
|
+
end
|
24
|
+
|
25
|
+
# Set up configuration
|
26
|
+
Compose::Config.configure do |config|
|
27
|
+
config.code_model = Model.find(options[:model])
|
28
|
+
config.verifier_model = Model.preferred_verifier_model
|
29
|
+
end
|
30
|
+
|
31
|
+
ApiKeyUtils.setup(Compose::Config.code_model)
|
32
|
+
ApiKeyUtils.setup(Compose::Config.verifier_model)
|
23
33
|
|
24
|
-
file_processor = FileProcessor.new(files, include_imports: options[:include_imports]
|
34
|
+
file_processor = FileProcessor.new(files, include_imports: options[:include_imports])
|
25
35
|
puts "Loaded #{file_processor.files.count} file(s): #{file_processor.files.keys.map { |path| File.exist?(path) ? Pathname.new(path).relative_path_from(Pathname.pwd).to_s : path }.join(', ')}"
|
26
36
|
|
27
37
|
task = ask('What do you need me to do? (Type \'ask\' followed by your question to ask a question instead):')
|
@@ -33,7 +43,7 @@ module Compose
|
|
33
43
|
else
|
34
44
|
edits = file_processor.edit_files(task)
|
35
45
|
|
36
|
-
edit_verifier = EditVerifier.new(edits
|
46
|
+
edit_verifier = EditVerifier.new(edits)
|
37
47
|
verified_edits = edit_verifier.verify_edits
|
38
48
|
|
39
49
|
edit_processor = EditProcessor.new(verified_edits)
|
@@ -2,10 +2,9 @@
|
|
2
2
|
|
3
3
|
module Compose
|
4
4
|
class EditVerifier
|
5
|
-
attr_reader :
|
5
|
+
attr_reader :edits
|
6
6
|
|
7
|
-
def initialize(edits
|
8
|
-
@ai_client = AIClient.new(model)
|
7
|
+
def initialize(edits)
|
9
8
|
@edits = edits
|
10
9
|
end
|
11
10
|
|
@@ -33,7 +32,7 @@ module Compose
|
|
33
32
|
file_content = FileProcessor.load_file(File.expand_path(edit[:filename]))
|
34
33
|
system_prompt = "CODE:\n#{file_content}\n"
|
35
34
|
|
36
|
-
response =
|
35
|
+
response = AIClient.new(Compose::Config.verifier_model).chat(system_prompt, prompt)
|
37
36
|
|
38
37
|
begin
|
39
38
|
verified_change = JSON.parse(response)
|
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
require 'find'
|
4
4
|
require 'pathname'
|
5
|
-
require '
|
5
|
+
require 'selenium-webdriver'
|
6
6
|
require 'nokogiri'
|
7
7
|
require 'reverse_markdown'
|
8
8
|
require 'digest/md5'
|
@@ -10,6 +10,8 @@ require 'fileutils'
|
|
10
10
|
|
11
11
|
module Compose
|
12
12
|
class FileProcessor
|
13
|
+
CACHE_DIR = File.join(ENV['HOME'], '.cache', 'compose')
|
14
|
+
|
13
15
|
attr_reader :ai_client, :files, :tokens
|
14
16
|
|
15
17
|
def self.load_file(file_path, include_imports: true)
|
@@ -25,8 +27,7 @@ module Compose
|
|
25
27
|
"<#{relative_path}>\n#{processed_content}\n</#{relative_path}>"
|
26
28
|
end
|
27
29
|
|
28
|
-
def initialize(input_files, include_imports: true
|
29
|
-
@ai_client = AIClient.new(model)
|
30
|
+
def initialize(input_files, include_imports: true)
|
30
31
|
@files = {}
|
31
32
|
|
32
33
|
input_files.each do |input|
|
@@ -34,15 +35,19 @@ module Compose
|
|
34
35
|
@files[input] = process_url(input)
|
35
36
|
elsif File.directory?(input)
|
36
37
|
find_files_in_directory(input).each do |file|
|
37
|
-
@files[File.expand_path(file)] =
|
38
|
+
@files[File.expand_path(file)] = nil
|
38
39
|
end
|
39
40
|
elsif File.file?(input)
|
40
|
-
@files[File.expand_path(input)] =
|
41
|
+
@files[File.expand_path(input)] = nil
|
41
42
|
else
|
42
43
|
puts "Skipping invalid input: #{input}".yellow
|
43
44
|
end
|
44
45
|
end
|
45
46
|
|
47
|
+
@files.each do |path, _|
|
48
|
+
@files[path] ||= FileProcessor.load_file(path, include_imports: include_imports)
|
49
|
+
end
|
50
|
+
|
46
51
|
self
|
47
52
|
end
|
48
53
|
|
@@ -55,7 +60,7 @@ module Compose
|
|
55
60
|
prompt.gsub!('{{QUESTION}}', question)
|
56
61
|
system_prompt = "CODE:\n#{content}\n"
|
57
62
|
|
58
|
-
|
63
|
+
AIClient.new(Compose::Config.code_model).chat(system_prompt, prompt)
|
59
64
|
end
|
60
65
|
|
61
66
|
def edit_files(task)
|
@@ -63,7 +68,7 @@ module Compose
|
|
63
68
|
prompt.gsub!('{{TASK}}', task)
|
64
69
|
system_prompt = "CODE:\n#{content}\n"
|
65
70
|
|
66
|
-
response =
|
71
|
+
response = AIClient.new(Compose::Config.code_model).chat(system_prompt, prompt)
|
67
72
|
|
68
73
|
begin
|
69
74
|
edits = JSON.parse(response)['edits']
|
@@ -102,7 +107,7 @@ module Compose
|
|
102
107
|
end
|
103
108
|
|
104
109
|
def process_url(url)
|
105
|
-
cached_file = File.join(
|
110
|
+
cached_file = File.join(CACHE_DIR, Digest::MD5.hexdigest(url))
|
106
111
|
content = if File.exist?(cached_file)
|
107
112
|
File.read(cached_file)
|
108
113
|
else
|
@@ -119,14 +124,38 @@ module Compose
|
|
119
124
|
end
|
120
125
|
|
121
126
|
def fetch_and_convert_url(url)
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
127
|
+
options = Selenium::WebDriver::Chrome::Options.new
|
128
|
+
options.add_argument('--headless')
|
129
|
+
options.add_argument('--disable-gpu')
|
130
|
+
options.add_argument('--no-sandbox')
|
131
|
+
options.add_argument('--disable-dev-shm-usage')
|
132
|
+
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
|
133
|
+
|
134
|
+
driver = Selenium::WebDriver.for :chrome, options: options
|
135
|
+
|
136
|
+
begin
|
137
|
+
driver.get(url)
|
138
|
+
html_content = driver.page_source
|
139
|
+
doc = Nokogiri::HTML(html_content)
|
140
|
+
doc.search('script, style').remove
|
141
|
+
|
142
|
+
parsed_markdown = ReverseMarkdown.convert(doc.to_html, unknown_tags: :bypass)
|
143
|
+
normalized_markdown = parsed_markdown.gsub(/ +/, ' ').strip
|
144
|
+
|
145
|
+
# Remove content before the first h1 in markdown
|
146
|
+
first_h1_index = normalized_markdown.index(/^# /)
|
147
|
+
if first_h1_index
|
148
|
+
normalized_markdown = normalized_markdown[first_h1_index..-1]
|
149
|
+
end
|
150
|
+
|
151
|
+
prompt = File.read(File.expand_path("../../config/prompts/markdown_cleanup.txt", __dir__))
|
152
|
+
prompt.gsub!('{{MARKDOWN}}', normalized_markdown)
|
153
|
+
cleaned_markdown = AIClient.new(Compose::Config.verifier_model).chat("", prompt)
|
154
|
+
|
155
|
+
cleaned_markdown
|
156
|
+
ensure
|
157
|
+
driver.quit
|
158
|
+
end
|
130
159
|
end
|
131
160
|
end
|
132
161
|
end
|
data/lib/compose/version.rb
CHANGED
data/lib/compose.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: compose
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dreaming Tulpa
|
@@ -165,7 +165,7 @@ dependencies:
|
|
165
165
|
- !ruby/object:Gem::Version
|
166
166
|
version: '0'
|
167
167
|
- !ruby/object:Gem::Dependency
|
168
|
-
name:
|
168
|
+
name: selenium-webdriver
|
169
169
|
requirement: !ruby/object:Gem::Requirement
|
170
170
|
requirements:
|
171
171
|
- - ">="
|
@@ -274,12 +274,14 @@ files:
|
|
274
274
|
- bin/compose
|
275
275
|
- config/models.yml
|
276
276
|
- config/prompts/ask.txt
|
277
|
+
- config/prompts/markdown_cleanup.txt
|
277
278
|
- config/prompts/task.txt
|
278
279
|
- config/prompts/verify.txt
|
279
280
|
- lib/compose.rb
|
280
281
|
- lib/compose/ai_client.rb
|
281
282
|
- lib/compose/api_key_utils.rb
|
282
283
|
- lib/compose/cli.rb
|
284
|
+
- lib/compose/config.rb
|
283
285
|
- lib/compose/edit_processor.rb
|
284
286
|
- lib/compose/edit_verifier.rb
|
285
287
|
- lib/compose/file_processor.rb
|