compose 0.1.7 → 0.1.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3ac262b371cf1ff49a50e94ea83aaf75e7867564341c3bd3bff024c77831c2b4
4
- data.tar.gz: 8577e99ba4dc26f7cd96a40589bb4c75cca3394e6e3302d8d43c9786df4633dd
3
+ metadata.gz: 050edb82f3b985eefdea7e2b791019968f7e8f13770a07652e12e0f273e159db
4
+ data.tar.gz: 8bce91cfa68da8141cca2ce29a8e9ab25be1051e5d72b93c6376a77bd84f222d
5
5
  SHA512:
6
- metadata.gz: 5db9243359a8045ca48620bb0fb9f5dc0876fe763c01f0e3fba2cf46467b1feffc42d69afd35857aa1398b0977a8d0478fef6a338c847b0ef43651a934b386f3
7
- data.tar.gz: 3e4de964ef36267bf768eba57c09d58e7037ae17e7313e61b068bb4611bb688ef914757b805a20d3ba7e4cc2a07f156d31bcb6270a0aebbca29a7708e2aa08a1
6
+ metadata.gz: 9e07206841afc24ee8daa040c5f11183198072f0e98913da0c5a1ac3d4605b14daf2ee130f93443eeb2f3fc6e7191021ccb1114210471d62353c5011de1fd8de
7
+ data.tar.gz: e0406eb23a9d5368cd9bc527237c152308716839afe9e649f3837434988dc07f246bbfd06281cbbfc0b62da7b98d2e3944af1783a181487b5c5c10935a967013
@@ -0,0 +1,30 @@
1
+ You are tasked with cleaning up a markdown file that was converted from HTML. The file contains unnecessary elements that need to be removed to isolate the main content of the page. Here is the markdown content to clean:
2
+
3
+ <markdown>
4
+ {{MARKDOWN}}
5
+ </markdown>
6
+
7
+ Your goal is to remove all artifacts and unnecessary elements from the markdown, leaving only the main content of the page.
8
+
9
+ Common artifacts and unnecessary elements to look for and remove include:
10
+
11
+ 1. Header and footer sections
12
+ 2. Navigation menus
13
+ 3. Sidebar content
14
+ 4. Social media sharing buttons
15
+ 5. Advertisement blocks
16
+ 6. Copyright notices
17
+
18
+ Follow these steps to clean the markdown content:
19
+
20
+ 1. Identify the main content section of the page. This is typically the largest block of text that contains the primary information.
21
+ 2. If there is no H1 heading, try to identify the main title or heading of the page and format it as an H1 heading.
22
+ 3. Remove any content before the main content section, including headers, navigation menus, and introductory elements.
23
+ 4. Remove any content after the main content section, including footers, related links, and closing elements.
24
+ 5. Ensure that headings are properly formatted with the correct number of `#` symbols.
25
+ 6. Preserve any important links, image references, captions, lists, tables,
26
+ code blocks etc. that are part of the main content.
27
+
28
+ Be careful not to remove any part of the main content while cleaning up the markdown.
29
+
30
+ Once you have cleaned the markdown content, provide only the cleanup markdown. Ensure that only the main content of the page remains, formatted correctly in markdown. Do not surround the response with backticks.
@@ -17,7 +17,7 @@ module Compose
17
17
  response.dig('content', 0, 'text')
18
18
  when 'openai'
19
19
  response = openai_chat(system_prompt, user_prompt, temperature: temperature)
20
- response.dig('choices', 0, 'message', 'content')
20
+ response
21
21
  else
22
22
  raise "Unsupported model: #{@model[:name]}"
23
23
  end
@@ -44,18 +44,27 @@ module Compose
44
44
  end
45
45
 
46
46
  def openai_chat(system_prompt, user_prompt, temperature: 0)
47
- Whirly.start spinner: 'dots', status: 'Verifying change'
47
+ Whirly.start spinner: 'dots', status: 'Received 0 chunks'
48
+ token_count = 0
49
+ response = ""
48
50
  client = OpenAI::Client.new(access_token: ENV['OPENAI_API_KEY'])
49
- response = client.chat(
51
+ client.chat(
50
52
  parameters: {
51
53
  model: @model[:name],
52
54
  messages: [
53
55
  { role: 'system', content: system_prompt },
54
56
  { role: 'user', content: user_prompt }
55
57
  ],
56
- temperature: temperature
58
+ temperature: temperature,
59
+ stream: Proc.new { |chunk|
60
+ token_count += 1
61
+ Whirly.status = "Received #{token_count} chunks"
62
+ content = chunk.dig("choices", 0, "delta", "content")
63
+ response += content if content
64
+ }
57
65
  }
58
66
  )
67
+
59
68
  Whirly.stop
60
69
  response
61
70
  end
data/lib/compose/cli.rb CHANGED
@@ -8,6 +8,7 @@ module Compose
8
8
  desc 'edit [FILES]', 'Edit files using AI assistance'
9
9
  method_option :model, type: :string, default: 'sonnet35', desc: 'AI model to use'
10
10
  method_option :include_imports, type: :boolean, default: true, aliases: '-a', desc: 'Include import statements when processing files'
11
+ method_option :clear_cache, type: :boolean, default: false, aliases: '-c', desc: 'Clear the cache before processing files'
11
12
  def edit(*files)
12
13
  puts 'Welcome to Compose!'.green
13
14
 
@@ -16,12 +17,21 @@ module Compose
16
17
  exit(1)
17
18
  end
18
19
 
19
- code_model = Model.find(options[:model])
20
- ApiKeyUtils.setup(code_model)
21
- verifier_model = Model.preferred_verifier_model
22
- ApiKeyUtils.setup(verifier_model)
20
+ if options[:clear_cache]
21
+ FileUtils.rm_rf(FileProcessor::CACHE_DIR)
22
+ puts 'Cache cleared'.green
23
+ end
24
+
25
+ # Set up configuration
26
+ Compose::Config.configure do |config|
27
+ config.code_model = Model.find(options[:model])
28
+ config.verifier_model = Model.preferred_verifier_model
29
+ end
30
+
31
+ ApiKeyUtils.setup(Compose::Config.code_model)
32
+ ApiKeyUtils.setup(Compose::Config.verifier_model)
23
33
 
24
- file_processor = FileProcessor.new(files, include_imports: options[:include_imports], model: code_model)
34
+ file_processor = FileProcessor.new(files, include_imports: options[:include_imports])
25
35
  puts "Loaded #{file_processor.files.count} file(s): #{file_processor.files.keys.map { |path| File.exist?(path) ? Pathname.new(path).relative_path_from(Pathname.pwd).to_s : path }.join(', ')}"
26
36
 
27
37
  task = ask('What do you need me to do? (Type \'ask\' followed by your question to ask a question instead):')
@@ -33,7 +43,7 @@ module Compose
33
43
  else
34
44
  edits = file_processor.edit_files(task)
35
45
 
36
- edit_verifier = EditVerifier.new(edits, model: verifier_model)
46
+ edit_verifier = EditVerifier.new(edits)
37
47
  verified_edits = edit_verifier.verify_edits
38
48
 
39
49
  edit_processor = EditProcessor.new(verified_edits)
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Compose
4
+ module Config
5
+ class << self
6
+ attr_accessor :verifier_model, :code_model
7
+
8
+ def configure
9
+ yield self
10
+ end
11
+ end
12
+ end
13
+ end
@@ -2,10 +2,9 @@
2
2
 
3
3
  module Compose
4
4
  class EditVerifier
5
- attr_reader :ai_client, :edits
5
+ attr_reader :edits
6
6
 
7
- def initialize(edits, model:)
8
- @ai_client = AIClient.new(model)
7
+ def initialize(edits)
9
8
  @edits = edits
10
9
  end
11
10
 
@@ -33,7 +32,7 @@ module Compose
33
32
  file_content = FileProcessor.load_file(File.expand_path(edit[:filename]))
34
33
  system_prompt = "CODE:\n#{file_content}\n"
35
34
 
36
- response = ai_client.chat(system_prompt, prompt)
35
+ response = AIClient.new(Compose::Config.verifier_model).chat(system_prompt, prompt)
37
36
 
38
37
  begin
39
38
  verified_change = JSON.parse(response)
@@ -2,7 +2,7 @@
2
2
 
3
3
  require 'find'
4
4
  require 'pathname'
5
- require 'httparty'
5
+ require 'selenium-webdriver'
6
6
  require 'nokogiri'
7
7
  require 'reverse_markdown'
8
8
  require 'digest/md5'
@@ -10,6 +10,8 @@ require 'fileutils'
10
10
 
11
11
  module Compose
12
12
  class FileProcessor
13
+ CACHE_DIR = File.join(ENV['HOME'], '.cache', 'compose')
14
+
13
15
  attr_reader :ai_client, :files, :tokens
14
16
 
15
17
  def self.load_file(file_path, include_imports: true)
@@ -25,8 +27,7 @@ module Compose
25
27
  "<#{relative_path}>\n#{processed_content}\n</#{relative_path}>"
26
28
  end
27
29
 
28
- def initialize(input_files, include_imports: true, model:)
29
- @ai_client = AIClient.new(model)
30
+ def initialize(input_files, include_imports: true)
30
31
  @files = {}
31
32
 
32
33
  input_files.each do |input|
@@ -34,15 +35,19 @@ module Compose
34
35
  @files[input] = process_url(input)
35
36
  elsif File.directory?(input)
36
37
  find_files_in_directory(input).each do |file|
37
- @files[File.expand_path(file)] = FileProcessor.load_file(file, include_imports: include_imports)
38
+ @files[File.expand_path(file)] = nil
38
39
  end
39
40
  elsif File.file?(input)
40
- @files[File.expand_path(input)] = FileProcessor.load_file(file, include_imports: include_imports)
41
+ @files[File.expand_path(input)] = nil
41
42
  else
42
43
  puts "Skipping invalid input: #{input}".yellow
43
44
  end
44
45
  end
45
46
 
47
+ @files.each do |path, _|
48
+ @files[path] ||= FileProcessor.load_file(path, include_imports: include_imports)
49
+ end
50
+
46
51
  self
47
52
  end
48
53
 
@@ -55,7 +60,7 @@ module Compose
55
60
  prompt.gsub!('{{QUESTION}}', question)
56
61
  system_prompt = "CODE:\n#{content}\n"
57
62
 
58
- ai_client.chat(system_prompt, prompt)
63
+ AIClient.new(Compose::Config.code_model).chat(system_prompt, prompt)
59
64
  end
60
65
 
61
66
  def edit_files(task)
@@ -63,7 +68,7 @@ module Compose
63
68
  prompt.gsub!('{{TASK}}', task)
64
69
  system_prompt = "CODE:\n#{content}\n"
65
70
 
66
- response = ai_client.chat(system_prompt, prompt)
71
+ response = AIClient.new(Compose::Config.code_model).chat(system_prompt, prompt)
67
72
 
68
73
  begin
69
74
  edits = JSON.parse(response)['edits']
@@ -102,7 +107,7 @@ module Compose
102
107
  end
103
108
 
104
109
  def process_url(url)
105
- cached_file = File.join(ENV['HOME'], '.compose', 'cache', Digest::MD5.hexdigest(url))
110
+ cached_file = File.join(CACHE_DIR, Digest::MD5.hexdigest(url))
106
111
  content = if File.exist?(cached_file)
107
112
  File.read(cached_file)
108
113
  else
@@ -119,14 +124,38 @@ module Compose
119
124
  end
120
125
 
121
126
  def fetch_and_convert_url(url)
122
- response = HTTParty.get(url, follow_redirects: true)
123
- return nil if response.code != 200
124
-
125
- doc = Nokogiri::HTML(response.body)
126
- doc.search('script, style').remove
127
- parsed_markdown = ReverseMarkdown.convert(doc.to_html, unknown_tags: :bypass)
128
- normalized_markdown = parsed_markdown.gsub(/\s+/, ' ').strip
129
- normalized_markdown
127
+ options = Selenium::WebDriver::Chrome::Options.new
128
+ options.add_argument('--headless')
129
+ options.add_argument('--disable-gpu')
130
+ options.add_argument('--no-sandbox')
131
+ options.add_argument('--disable-dev-shm-usage')
132
+ options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
133
+
134
+ driver = Selenium::WebDriver.for :chrome, options: options
135
+
136
+ begin
137
+ driver.get(url)
138
+ html_content = driver.page_source
139
+ doc = Nokogiri::HTML(html_content)
140
+ doc.search('script, style').remove
141
+
142
+ parsed_markdown = ReverseMarkdown.convert(doc.to_html, unknown_tags: :bypass)
143
+ normalized_markdown = parsed_markdown.gsub(/ +/, ' ').strip
144
+
145
+ # Remove content before the first h1 in markdown
146
+ first_h1_index = normalized_markdown.index(/^# /)
147
+ if first_h1_index
148
+ normalized_markdown = normalized_markdown[first_h1_index..-1]
149
+ end
150
+
151
+ prompt = File.read(File.expand_path("../../config/prompts/markdown_cleanup.txt", __dir__))
152
+ prompt.gsub!('{{MARKDOWN}}', normalized_markdown)
153
+ cleaned_markdown = AIClient.new(Compose::Config.verifier_model).chat("", prompt)
154
+
155
+ cleaned_markdown
156
+ ensure
157
+ driver.quit
158
+ end
130
159
  end
131
160
  end
132
161
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Compose
4
- VERSION = '0.1.7'
4
+ VERSION = '0.1.8'
5
5
  end
data/lib/compose.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative 'compose/version'
4
+ require_relative 'compose/config'
4
5
  require_relative 'compose/cli'
5
6
  require_relative 'compose/api_key_utils'
6
7
  require_relative 'compose/file_processor'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: compose
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.7
4
+ version: 0.1.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dreaming Tulpa
@@ -165,7 +165,7 @@ dependencies:
165
165
  - !ruby/object:Gem::Version
166
166
  version: '0'
167
167
  - !ruby/object:Gem::Dependency
168
- name: httparty
168
+ name: selenium-webdriver
169
169
  requirement: !ruby/object:Gem::Requirement
170
170
  requirements:
171
171
  - - ">="
@@ -274,12 +274,14 @@ files:
274
274
  - bin/compose
275
275
  - config/models.yml
276
276
  - config/prompts/ask.txt
277
+ - config/prompts/markdown_cleanup.txt
277
278
  - config/prompts/task.txt
278
279
  - config/prompts/verify.txt
279
280
  - lib/compose.rb
280
281
  - lib/compose/ai_client.rb
281
282
  - lib/compose/api_key_utils.rb
282
283
  - lib/compose/cli.rb
284
+ - lib/compose/config.rb
283
285
  - lib/compose/edit_processor.rb
284
286
  - lib/compose/edit_verifier.rb
285
287
  - lib/compose/file_processor.rb