compose 0.1.6 → 0.1.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '0181e80bc473a0e67c074cc18b299be97ecad391875a7578780a3f8a93286cdc'
4
- data.tar.gz: a4d171f396fd690387c122958565eb047efd3b0ef11c99acd6fc6e557a78f1e4
3
+ metadata.gz: 050edb82f3b985eefdea7e2b791019968f7e8f13770a07652e12e0f273e159db
4
+ data.tar.gz: 8bce91cfa68da8141cca2ce29a8e9ab25be1051e5d72b93c6376a77bd84f222d
5
5
  SHA512:
6
- metadata.gz: 172b6babed118da8b989bffb8dad311eef02a27c21200df782fccf29783d929559ca44e98023c4ac2e3607e1ece53a36e7490834c9fc09aaffe82aaf8dfc94ec
7
- data.tar.gz: 89b5cd472edfe13a5007e753d23bca1be694ae056314f71376f442af323de21e1bdcf395c0560445534172c22a7304185df21d0b1835c7e65f05c26c325bf050
6
+ metadata.gz: 9e07206841afc24ee8daa040c5f11183198072f0e98913da0c5a1ac3d4605b14daf2ee130f93443eeb2f3fc6e7191021ccb1114210471d62353c5011de1fd8de
7
+ data.tar.gz: e0406eb23a9d5368cd9bc527237c152308716839afe9e649f3837434988dc07f246bbfd06281cbbfc0b62da7b98d2e3944af1783a181487b5c5c10935a967013
@@ -0,0 +1,30 @@
1
+ You are tasked with cleaning up a markdown file that was converted from HTML. The file contains unnecessary elements that need to be removed to isolate the main content of the page. Here is the markdown content to clean:
2
+
3
+ <markdown>
4
+ {{MARKDOWN}}
5
+ </markdown>
6
+
7
+ Your goal is to remove all artifacts and unnecessary elements from the markdown, leaving only the main content of the page.
8
+
9
+ Common artifacts and unnecessary elements to look for and remove include:
10
+
11
+ 1. Header and footer sections
12
+ 2. Navigation menus
13
+ 3. Sidebar content
14
+ 4. Social media sharing buttons
15
+ 5. Advertisement blocks
16
+ 6. Copyright notices
17
+
18
+ Follow these steps to clean the markdown content:
19
+
20
+ 1. Identify the main content section of the page. This is typically the largest block of text that contains the primary information.
21
+ 2. If there is no H1 heading, try to identify the main title or heading of the page and format it as an H1 heading.
22
+ 3. Remove any content before the main content section, including headers, navigation menus, and introductory elements.
23
+ 4. Remove any content after the main content section, including footers, related links, and closing elements.
24
+ 5. Ensure that headings are properly formatted with the correct number of `#` symbols.
25
+ 6. Preserve any important links, image references, captions, lists, tables,
26
+ code blocks etc. that are part of the main content.
27
+
28
+ Be careful not to remove any part of the main content while cleaning up the markdown.
29
+
30
+ Once you have cleaned the markdown content, provide only the cleanup markdown. Ensure that only the main content of the page remains, formatted correctly in markdown. Do not surround the response with backticks.
@@ -17,7 +17,7 @@ module Compose
17
17
  response.dig('content', 0, 'text')
18
18
  when 'openai'
19
19
  response = openai_chat(system_prompt, user_prompt, temperature: temperature)
20
- response.dig('choices', 0, 'message', 'content')
20
+ response
21
21
  else
22
22
  raise "Unsupported model: #{@model[:name]}"
23
23
  end
@@ -44,18 +44,27 @@ module Compose
44
44
  end
45
45
 
46
46
  def openai_chat(system_prompt, user_prompt, temperature: 0)
47
- Whirly.start spinner: 'dots', status: 'Verifying change'
47
+ Whirly.start spinner: 'dots', status: 'Received 0 chunks'
48
+ token_count = 0
49
+ response = ""
48
50
  client = OpenAI::Client.new(access_token: ENV['OPENAI_API_KEY'])
49
- response = client.chat(
51
+ client.chat(
50
52
  parameters: {
51
53
  model: @model[:name],
52
54
  messages: [
53
55
  { role: 'system', content: system_prompt },
54
56
  { role: 'user', content: user_prompt }
55
57
  ],
56
- temperature: temperature
58
+ temperature: temperature,
59
+ stream: Proc.new { |chunk|
60
+ token_count += 1
61
+ Whirly.status = "Received #{token_count} chunks"
62
+ content = chunk.dig("choices", 0, "delta", "content")
63
+ response += content if content
64
+ }
57
65
  }
58
66
  )
67
+
59
68
  Whirly.stop
60
69
  response
61
70
  end
data/lib/compose/cli.rb CHANGED
@@ -8,6 +8,7 @@ module Compose
8
8
  desc 'edit [FILES]', 'Edit files using AI assistance'
9
9
  method_option :model, type: :string, default: 'sonnet35', desc: 'AI model to use'
10
10
  method_option :include_imports, type: :boolean, default: true, aliases: '-a', desc: 'Include import statements when processing files'
11
+ method_option :clear_cache, type: :boolean, default: false, aliases: '-c', desc: 'Clear the cache before processing files'
11
12
  def edit(*files)
12
13
  puts 'Welcome to Compose!'.green
13
14
 
@@ -16,12 +17,21 @@ module Compose
16
17
  exit(1)
17
18
  end
18
19
 
19
- code_model = Model.find(options[:model])
20
- ApiKeyUtils.setup(code_model)
21
- verifier_model = Model.preferred_verifier_model
22
- ApiKeyUtils.setup(verifier_model)
20
+ if options[:clear_cache]
21
+ FileUtils.rm_rf(FileProcessor::CACHE_DIR)
22
+ puts 'Cache cleared'.green
23
+ end
24
+
25
+ # Set up configuration
26
+ Compose::Config.configure do |config|
27
+ config.code_model = Model.find(options[:model])
28
+ config.verifier_model = Model.preferred_verifier_model
29
+ end
30
+
31
+ ApiKeyUtils.setup(Compose::Config.code_model)
32
+ ApiKeyUtils.setup(Compose::Config.verifier_model)
23
33
 
24
- file_processor = FileProcessor.new(files, include_imports: options[:include_imports], model: code_model)
34
+ file_processor = FileProcessor.new(files, include_imports: options[:include_imports])
25
35
  puts "Loaded #{file_processor.files.count} file(s): #{file_processor.files.keys.map { |path| File.exist?(path) ? Pathname.new(path).relative_path_from(Pathname.pwd).to_s : path }.join(', ')}"
26
36
 
27
37
  task = ask('What do you need me to do? (Type \'ask\' followed by your question to ask a question instead):')
@@ -33,7 +43,7 @@ module Compose
33
43
  else
34
44
  edits = file_processor.edit_files(task)
35
45
 
36
- edit_verifier = EditVerifier.new(edits, model: verifier_model)
46
+ edit_verifier = EditVerifier.new(edits)
37
47
  verified_edits = edit_verifier.verify_edits
38
48
 
39
49
  edit_processor = EditProcessor.new(verified_edits)
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Compose
4
+ module Config
5
+ class << self
6
+ attr_accessor :verifier_model, :code_model
7
+
8
+ def configure
9
+ yield self
10
+ end
11
+ end
12
+ end
13
+ end
@@ -2,10 +2,9 @@
2
2
 
3
3
  module Compose
4
4
  class EditVerifier
5
- attr_reader :ai_client, :edits
5
+ attr_reader :edits
6
6
 
7
- def initialize(edits, model:)
8
- @ai_client = AIClient.new(model)
7
+ def initialize(edits)
9
8
  @edits = edits
10
9
  end
11
10
 
@@ -33,7 +32,7 @@ module Compose
33
32
  file_content = FileProcessor.load_file(File.expand_path(edit[:filename]))
34
33
  system_prompt = "CODE:\n#{file_content}\n"
35
34
 
36
- response = ai_client.chat(system_prompt, prompt)
35
+ response = AIClient.new(Compose::Config.verifier_model).chat(system_prompt, prompt)
37
36
 
38
37
  begin
39
38
  verified_change = JSON.parse(response)
@@ -2,9 +2,16 @@
2
2
 
3
3
  require 'find'
4
4
  require 'pathname'
5
+ require 'selenium-webdriver'
6
+ require 'nokogiri'
7
+ require 'reverse_markdown'
8
+ require 'digest/md5'
9
+ require 'fileutils'
5
10
 
6
11
  module Compose
7
12
  class FileProcessor
13
+ CACHE_DIR = File.join(ENV['HOME'], '.cache', 'compose')
14
+
8
15
  attr_reader :ai_client, :files, :tokens
9
16
 
10
17
  def self.load_file(file_path, include_imports: true)
@@ -20,12 +27,13 @@ module Compose
20
27
  "<#{relative_path}>\n#{processed_content}\n</#{relative_path}>"
21
28
  end
22
29
 
23
- def initialize(input_files, include_imports: true, model:)
24
- @ai_client = AIClient.new(model)
30
+ def initialize(input_files, include_imports: true)
25
31
  @files = {}
26
32
 
27
33
  input_files.each do |input|
28
- if File.directory?(input)
34
+ if input.start_with?('http://', 'https://')
35
+ @files[input] = process_url(input)
36
+ elsif File.directory?(input)
29
37
  find_files_in_directory(input).each do |file|
30
38
  @files[File.expand_path(file)] = nil
31
39
  end
@@ -36,8 +44,8 @@ module Compose
36
44
  end
37
45
  end
38
46
 
39
- @files.each do |file, _|
40
- @files[file] = FileProcessor.load_file(file, include_imports: include_imports)
47
+ @files.each do |path, _|
48
+ @files[path] ||= FileProcessor.load_file(path, include_imports: include_imports)
41
49
  end
42
50
 
43
51
  self
@@ -52,7 +60,7 @@ module Compose
52
60
  prompt.gsub!('{{QUESTION}}', question)
53
61
  system_prompt = "CODE:\n#{content}\n"
54
62
 
55
- ai_client.chat(system_prompt, prompt)
63
+ AIClient.new(Compose::Config.code_model).chat(system_prompt, prompt)
56
64
  end
57
65
 
58
66
  def edit_files(task)
@@ -60,7 +68,7 @@ module Compose
60
68
  prompt.gsub!('{{TASK}}', task)
61
69
  system_prompt = "CODE:\n#{content}\n"
62
70
 
63
- response = ai_client.chat(system_prompt, prompt)
71
+ response = AIClient.new(Compose::Config.code_model).chat(system_prompt, prompt)
64
72
 
65
73
  begin
66
74
  edits = JSON.parse(response)['edits']
@@ -97,5 +105,57 @@ module Compose
97
105
  rescue
98
106
  false
99
107
  end
108
+
109
+ def process_url(url)
110
+ cached_file = File.join(CACHE_DIR, Digest::MD5.hexdigest(url))
111
+ content = if File.exist?(cached_file)
112
+ File.read(cached_file)
113
+ else
114
+ content = fetch_and_convert_url(url)
115
+ return if content.nil?
116
+
117
+ FileUtils.mkdir_p(File.dirname(cached_file))
118
+ File.write(cached_file, content)
119
+ content
120
+ end
121
+ return if content.nil?
122
+
123
+ "<url src='#{url}'>\n#{content}\n</url>"
124
+ end
125
+
126
+ def fetch_and_convert_url(url)
127
+ options = Selenium::WebDriver::Chrome::Options.new
128
+ options.add_argument('--headless')
129
+ options.add_argument('--disable-gpu')
130
+ options.add_argument('--no-sandbox')
131
+ options.add_argument('--disable-dev-shm-usage')
132
+ options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
133
+
134
+ driver = Selenium::WebDriver.for :chrome, options: options
135
+
136
+ begin
137
+ driver.get(url)
138
+ html_content = driver.page_source
139
+ doc = Nokogiri::HTML(html_content)
140
+ doc.search('script, style').remove
141
+
142
+ parsed_markdown = ReverseMarkdown.convert(doc.to_html, unknown_tags: :bypass)
143
+ normalized_markdown = parsed_markdown.gsub(/ +/, ' ').strip
144
+
145
+ # Remove content before the first h1 in markdown
146
+ first_h1_index = normalized_markdown.index(/^# /)
147
+ if first_h1_index
148
+ normalized_markdown = normalized_markdown[first_h1_index..-1]
149
+ end
150
+
151
+ prompt = File.read(File.expand_path("../../config/prompts/markdown_cleanup.txt", __dir__))
152
+ prompt.gsub!('{{MARKDOWN}}', normalized_markdown)
153
+ cleaned_markdown = AIClient.new(Compose::Config.verifier_model).chat("", prompt)
154
+
155
+ cleaned_markdown
156
+ ensure
157
+ driver.quit
158
+ end
159
+ end
100
160
  end
101
161
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Compose
4
- VERSION = '0.1.6'
4
+ VERSION = '0.1.8'
5
5
  end
data/lib/compose.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative 'compose/version'
4
+ require_relative 'compose/config'
4
5
  require_relative 'compose/cli'
5
6
  require_relative 'compose/api_key_utils'
6
7
  require_relative 'compose/file_processor'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: compose
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.6
4
+ version: 0.1.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dreaming Tulpa
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-09-11 00:00:00.000000000 Z
11
+ date: 2024-09-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ruby-openai
@@ -164,6 +164,62 @@ dependencies:
164
164
  - - ">="
165
165
  - !ruby/object:Gem::Version
166
166
  version: '0'
167
+ - !ruby/object:Gem::Dependency
168
+ name: selenium-webdriver
169
+ requirement: !ruby/object:Gem::Requirement
170
+ requirements:
171
+ - - ">="
172
+ - !ruby/object:Gem::Version
173
+ version: '0'
174
+ type: :runtime
175
+ prerelease: false
176
+ version_requirements: !ruby/object:Gem::Requirement
177
+ requirements:
178
+ - - ">="
179
+ - !ruby/object:Gem::Version
180
+ version: '0'
181
+ - !ruby/object:Gem::Dependency
182
+ name: nokogiri
183
+ requirement: !ruby/object:Gem::Requirement
184
+ requirements:
185
+ - - ">="
186
+ - !ruby/object:Gem::Version
187
+ version: '0'
188
+ type: :runtime
189
+ prerelease: false
190
+ version_requirements: !ruby/object:Gem::Requirement
191
+ requirements:
192
+ - - ">="
193
+ - !ruby/object:Gem::Version
194
+ version: '0'
195
+ - !ruby/object:Gem::Dependency
196
+ name: reverse_markdown
197
+ requirement: !ruby/object:Gem::Requirement
198
+ requirements:
199
+ - - ">="
200
+ - !ruby/object:Gem::Version
201
+ version: '0'
202
+ type: :runtime
203
+ prerelease: false
204
+ version_requirements: !ruby/object:Gem::Requirement
205
+ requirements:
206
+ - - ">="
207
+ - !ruby/object:Gem::Version
208
+ version: '0'
209
+ - !ruby/object:Gem::Dependency
210
+ name: digest
211
+ requirement: !ruby/object:Gem::Requirement
212
+ requirements:
213
+ - - ">="
214
+ - !ruby/object:Gem::Version
215
+ version: '0'
216
+ type: :runtime
217
+ prerelease: false
218
+ version_requirements: !ruby/object:Gem::Requirement
219
+ requirements:
220
+ - - ">="
221
+ - !ruby/object:Gem::Version
222
+ version: '0'
167
223
  - !ruby/object:Gem::Dependency
168
224
  name: bundler
169
225
  requirement: !ruby/object:Gem::Requirement
@@ -218,12 +274,14 @@ files:
218
274
  - bin/compose
219
275
  - config/models.yml
220
276
  - config/prompts/ask.txt
277
+ - config/prompts/markdown_cleanup.txt
221
278
  - config/prompts/task.txt
222
279
  - config/prompts/verify.txt
223
280
  - lib/compose.rb
224
281
  - lib/compose/ai_client.rb
225
282
  - lib/compose/api_key_utils.rb
226
283
  - lib/compose/cli.rb
284
+ - lib/compose/config.rb
227
285
  - lib/compose/edit_processor.rb
228
286
  - lib/compose/edit_verifier.rb
229
287
  - lib/compose/file_processor.rb