dorian 2.0.2 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. checksums.yaml +4 -4
  2. data/VERSION +1 -1
  3. data/lib/dorian/bin.rb +478 -38
  4. metadata +16 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 60015bb962654dc712a54a08aedaf13c7727ebca8ae136c031c4d595c29daff1
4
- data.tar.gz: 3af0aba2481e360390435c7dd9d75d4d2201705b5ea3503c165fb4e82b50d6c7
3
+ metadata.gz: bd057371121d824ad1a6172b189916820b5e5544f9cabd95db4fd7f51a2bd5fa
4
+ data.tar.gz: 244af12257d30dc4c617b8443a4a02d2c8ecc0fb7209f7b27dee7525ee43b35f
5
5
  SHA512:
6
- metadata.gz: 5630497aed0f3498f6e442aceeb47d138334a37049a6b1fbd6fdb61b722257d98a819876e22fbcd01a826ae144f5bda966503179140c8869de3694cd0597bfd6
7
- data.tar.gz: a075cd1be6708d17d6d47733e7cadb8273fe3060ecda00610b8523a2c19cba7e5b84a6b564736fca7a15483394f187d539c422c369c56aaa148f0411836c1982
6
+ metadata.gz: a6e82fef1ff6f595dfecba39e6f14ad19bf502f38b8c4af6d5e261eb69cbd3461119fbbb526089cddea881a00cb26f3b11706b34969485d2e2ad61bce6829c27
7
+ data.tar.gz: a3f12ae1d9d88cb6f28b975f5b59ca11ed37baa612c3e8b6c58009bc639f59084ed1af3cb2f400ed94bfc38ed8a51b7ef0c6b93aa7a1ad8118a52c8e7b7cec41
data/VERSION CHANGED
@@ -1 +1 @@
1
- 2.0.2
1
+ 2.2.0
data/lib/dorian/bin.rb CHANGED
@@ -5,9 +5,12 @@ require "dorian/arguments"
5
5
  require "dorian/eval"
6
6
  require "dorian/progress"
7
7
  require "dorian/to_struct"
8
+ require "git"
8
9
  require "json"
9
- require "yaml"
10
+ require "net/http"
10
11
  require "parallel"
12
+ require "uri"
13
+ require "yaml"
11
14
 
12
15
  class Dorian
13
16
  class Bin
@@ -21,10 +24,12 @@ class Dorian
21
24
  "jsonl" => :jsonl,
22
25
  "raw" => :raw,
23
26
  "yaml" => :yaml,
24
- "yml" => :yaml
27
+ "yml" => :yaml,
28
+ "yamll" => :yamll,
29
+ "ymll" => :yamll
25
30
  }.freeze
26
31
 
27
- attr_reader :parsed, :command, :arguments
32
+ attr_reader :parsed, :command, :arguments, :ruby, :ruby_before, :ruby_after
28
33
 
29
34
  def initialize
30
35
  @parsed =
@@ -47,6 +52,15 @@ class Dorian
47
52
  parallel: {
48
53
  alias: :p
49
54
  },
55
+ parallel_type: {
56
+ alias: :pt,
57
+ type: :string,
58
+ default: :processes
59
+ },
60
+ n: {
61
+ type: :integer,
62
+ default: 100
63
+ },
50
64
  rails: {
51
65
  alias: :r
52
66
  },
@@ -73,7 +87,8 @@ class Dorian
73
87
  progress: :boolean,
74
88
  headers: :boolean,
75
89
  progress_format: {
76
- alias: :pf
90
+ alias: :pf,
91
+ type: :string
77
92
  },
78
93
  pretty: {
79
94
  default: true
@@ -87,7 +102,9 @@ class Dorian
87
102
  }
88
103
  )
89
104
 
90
- @command, *@arguments = parsed.arguments
105
+ @arguments = parsed.arguments
106
+ @command = arguments.first
107
+ @ruby = nil
91
108
  end
92
109
 
93
110
  def self.run(...)
@@ -99,12 +116,71 @@ class Dorian
99
116
  abort VERSION if version?
100
117
 
101
118
  case command&.to_sym
102
- when :read, nil
103
- command_read
104
119
  when :each
120
+ arguments.delete("each")
121
+ @command = :each
122
+ @ruby = arguments.delete_at(0)
105
123
  command_each
124
+ when :all
125
+ arguments.delete("all")
126
+ @command = :all
127
+ @ruby = arguments.delete_at(0)
128
+ command_all
129
+ when :before
130
+ arguments.delete("before")
131
+ @command = :before
132
+ @ruby = arguments.delete_at(0)
133
+ command_before
134
+ when :after
135
+ arguments.delete("after")
136
+ @command = :after
137
+ @ruby = arguments.delete_at(0)
138
+ command_after
139
+ when :between
140
+ arguments.delete("between")
141
+ @command = :between
142
+ @ruby_after = arguments.delete_at(0)
143
+ @ruby_before = arguments.delete_at(0)
144
+ command_between
145
+ when :select
146
+ arguments.delete("select")
147
+ @command = :select
148
+ @ruby = arguments.delete_at(0)
149
+ command_select
150
+ when :reject
151
+ arguments.delete("reject")
152
+ @command = :reject
153
+ @ruby = arguments.delete_at(0)
154
+ command_reject
155
+ when :tally
156
+ arguments.delete("tally")
157
+ @command = :tally
158
+ @ruby = arguments.delete_at(0)
159
+ command_tally
160
+ when :anonymize
161
+ arguments.delete("anonymize")
162
+ @command = :anonymize
163
+ command_anonymize
164
+ when :append
165
+ arguments.delete("append")
166
+ @command = :append
167
+ command_append
168
+ when :prepend
169
+ arguments.delete("prepend")
170
+ @command = :prepend
171
+ command_prepend
172
+ when :chat
173
+ arguments.delete("chat")
174
+ @command = :chat
175
+ command_chat
176
+ when :commit
177
+ arguments.delete("commit")
178
+ @command = :commit
179
+ command_commit
106
180
  else
107
- abort "#{command} not supported"
181
+ arguments.delete("read")
182
+ @command = :read
183
+ command_read
108
184
  end
109
185
  end
110
186
 
@@ -112,19 +188,154 @@ class Dorian
112
188
  parsed.files
113
189
  end
114
190
 
191
+ def command_chat
192
+ puts completion(
193
+ token: token(".chat"),
194
+ model: "gpt-4o",
195
+ messages: [{ role: :user, content: everything.join("\n") }]
196
+ )
197
+ end
198
+
199
+ def command_commit
200
+ system_prompt = "simple, clear, short, lowercase commit message"
201
+ prompt_1 = "for the following diff:"
202
+ prompt_2 = "for the following git status:"
203
+ prompt_3 = "for the following comment:"
204
+
205
+ content_1 = short(`git diff --staged`)
206
+ content_2 = short(`git status`)
207
+ content_3 = short(arguments.join("\n"))
208
+
209
+ abort "no staged files" if content_1.empty?
210
+
211
+ messages = [
212
+ { role: :system, content: system_prompt },
213
+ { role: :system, content: prompt_1 },
214
+ { role: :user, content: content_1 },
215
+ { role: :system, content: prompt_2 },
216
+ { role: :user, content: content_2 },
217
+ { role: :system, content: prompt_3 },
218
+ { role: :user, content: content_3 }
219
+ ]
220
+
221
+ message = completion(token: token(".commit"), model: "gpt-4o", messages: messages)
222
+
223
+ Git.open(".").commit(message)
224
+
225
+ puts message
226
+ end
227
+
115
228
  def command_read
116
229
  each(stdin_files + files) do |input|
117
230
  outputs(reads(File.read(input)), file: input)
118
231
  end
119
232
 
120
- each(stdin_arguments) { |input| outputs(reads(input)) }
233
+ each(stdin_arguments + arguments) { |input| outputs(reads(input)) }
234
+ end
235
+
236
+ def everything
237
+ read_stdin_files + stdin_arguments + read_files + arguments
121
238
  end
122
239
 
123
240
  def command_each
124
- each(read_stdin_files + read_files + stdin_arguments) do |input|
125
- each(lines(reads(input))) do |line|
126
- evaluates(arguments.join(" "), it: line)
127
- end
241
+ each(everything) do |input|
242
+ each(lines(reads(input)), progress: true) { |line| evaluates(it: line) }
243
+ end
244
+ end
245
+
246
+ def command_tally
247
+ each(everything) do |input|
248
+ outputs(
249
+ JSON.pretty_generate(
250
+ map(lines(reads(input)), progress: true) do |element|
251
+ if ruby.to_s.empty?
252
+ element
253
+ else
254
+ evaluates(it: element, returns: true, stdout: false).returned
255
+ end
256
+ end.tally
257
+ )
258
+ )
259
+ end
260
+ end
261
+
262
+ def command_all
263
+ each(everything, progress: true) { |input| evaluates(it: reads(input)) }
264
+ end
265
+
266
+ def command_append
267
+ outputs(everything.map { |input| lines(reads(input)) }.inject(&:+))
268
+ end
269
+
270
+ def command_prepend
271
+ outputs(
272
+ everything.reverse.map { |input| lines(reads(input)) }.inject(&:+)
273
+ )
274
+ end
275
+
276
+ def command_select
277
+ each(stdin_files + files) do |input|
278
+ outputs(
279
+ select(lines(reads(File.read(input)))) { |element| match?(element) },
280
+ file: input
281
+ )
282
+ end
283
+
284
+ each(stdin_arguments + arguments) do |input|
285
+ outputs(select(lines(reads(input))) { |element| match?(element) })
286
+ end
287
+ end
288
+
289
+ def command_reject
290
+ each(stdin_files + files) do |input|
291
+ outputs(
292
+ reject(lines(reads(File.read(input)))) { |element| match?(element) },
293
+ file: input
294
+ )
295
+ end
296
+
297
+ each(stdin_arguments + arguments) do |input|
298
+ outputs(reject(lines(reads(input))) { |element| match?(element) })
299
+ end
300
+ end
301
+
302
+ def command_after
303
+ each(stdin_files + files) do |input|
304
+ outputs(after(lines(reads(File.read(input)))), file: input)
305
+ end
306
+
307
+ each(stdin_arguments + arguments) do |input|
308
+ outputs(after(lines(reads(input))))
309
+ end
310
+ end
311
+
312
+ def command_before
313
+ each(stdin_files + files) do |input|
314
+ outputs(before(reads(File.read(input))), file: input)
315
+ end
316
+
317
+ each(stdin_arguments + arguments) do |input|
318
+ outputs(before(lines(reads(input))))
319
+ end
320
+ end
321
+
322
+ def command_between
323
+ each(stdin_files + files) do |input|
324
+ outputs(between(lines(reads(File.read(input)))), file: input)
325
+ end
326
+
327
+ each(stdin_arguments + arguments) do |input|
328
+ outputs(between(lines(reads(input))))
329
+ end
330
+ end
331
+
332
+ def command_anonymize
333
+ each(stdin_files + files) do |input|
334
+ outputs(anonymize(reads(File.read(input))), file: input)
335
+ end
336
+
337
+ each(stdin_arguments + arguments) do |input|
338
+ outputs(anonymize(reads(input)))
128
339
  end
129
340
  end
130
341
 
@@ -141,25 +352,22 @@ class Dorian
141
352
 
142
353
  case output
143
354
  when :csv
144
- CSV.generate(headers: headers_of(content)) do |csv|
145
- csv << headers_of(content) if headers_of(content)
146
-
147
- each(content) { |row| csv << row }
148
- end
355
+ (headers_of(content) ? headers_of(content).to_csv : "") +
356
+ map(content) do |element|
357
+ CSV.generate(headers: headers_of(content)) do |csv|
358
+ csv << wrap(element)
359
+ end
360
+ end.join
149
361
  when :json
150
362
  pretty? ? JSON.pretty_generate(content) : content.to_json
151
- when :jsonl
363
+ when :jsonl, :yamll
152
364
  map(content, &:to_json).join("\n")
153
365
  when :raw
154
366
  content
155
- when :ruby
156
- content.inspect
157
- when :rubyl
158
- map(content, &:inspect).join("\n")
159
367
  when :yaml
160
368
  content.to_yaml
161
369
  else
162
- abort "#{output} not supported"
370
+ abort "#{output.inspect} not supported"
163
371
  end
164
372
  end
165
373
 
@@ -173,15 +381,19 @@ class Dorian
173
381
  end
174
382
  when :json
175
383
  JSON.parse(content).to_deep_struct
176
- when :jsonl
384
+ when :jsonl, :yamll
177
385
  map(content.lines) { |line| JSON.parse(line) }.to_deep_struct
178
386
  when :raw
179
387
  content
180
388
  when :yaml
181
- YAML.safe_load(content)
389
+ YAML.safe_load(content).to_deep_struct
182
390
  else
183
- abort "#{input} not supported"
391
+ abort "#{input.inspect} not supported"
184
392
  end
393
+ rescue JSON::ParserError => e
394
+ abort "invalid json: #{e.message}"
395
+ rescue Psych::SyntaxError => e
396
+ abort "invalid yaml: #{e.message}"
185
397
  end
186
398
 
187
399
  def pretty?
@@ -190,13 +402,15 @@ class Dorian
190
402
 
191
403
  def read_stdin
192
404
  @read_stdin ||= $stdin.each_line.to_a
405
+ rescue Interrupt
406
+ abort "interupt in read_stdin"
193
407
  end
194
408
 
195
409
  def stdin_files
196
- return [] if files.any?
197
- return [] unless stdin == :files
410
+ return [] if files.any? || arguments.any?
411
+ return [] if stdin != :files
198
412
 
199
- read_stdin.map(&:rstrip)
413
+ map(read_stdin, &:rstrip)
200
414
  end
201
415
 
202
416
  def read_stdin_files
@@ -208,7 +422,7 @@ class Dorian
208
422
  end
209
423
 
210
424
  def stdin_arguments
211
- return [] if files.any?
425
+ return [] if files.any? || arguments.any?
212
426
  return [] if stdin == :files
213
427
 
214
428
  [read_stdin.join]
@@ -218,6 +432,10 @@ class Dorian
218
432
  options.stdin.to_sym
219
433
  end
220
434
 
435
+ def deep?
436
+ !!options.deep
437
+ end
438
+
221
439
  def options
222
440
  parsed.options
223
441
  end
@@ -294,30 +512,252 @@ class Dorian
294
512
  parsed.help
295
513
  end
296
514
 
515
+ def n
516
+ options.n
517
+ end
518
+
519
+ def parallel_type
520
+ options.parallel_type&.to_sym
521
+ end
522
+
297
523
  def headers_of(content)
298
- return unless headers?
299
524
  return unless content.respond_to?(:first)
300
525
  return unless content.first
301
526
  return unless content.first.respond_to?(:to_h)
302
527
  return unless content.first.to_h.keys.any?
303
528
 
304
529
  content.first.to_h.keys
530
+ rescue TypeError
531
+ nil
305
532
  end
306
533
 
307
- def each(collection, &)
308
- parallel? ? Parallel.each(collection, &) : collection.each(&)
534
+ def parallel_options
535
+ if parallel_type == :processes
536
+ { in_processes: n }
537
+ elsif parallel_type == :threads
538
+ { in_threads: n }
539
+ else
540
+ abort "#{parallel_type.inspect} not supported"
541
+ end
309
542
  end
310
543
 
311
- def map(collection, &)
312
- parallel? ? Parallel.map(collection, &) : collection.map(&)
544
+ def each(collection, options: parallel_options, progress: false, &)
545
+ collection = wrap(collection)
546
+ progress_bar = progress ? create_progress_bar(collection.size) : nil
547
+
548
+ if parallel?
549
+ Parallel.each(
550
+ collection,
551
+ **options,
552
+ finish: ->(*) { progress_bar&.increment },
553
+ &
554
+ )
555
+ else
556
+ collection.each do |element|
557
+ yield(element).tap { progress_bar&.increment }
558
+ end
559
+ end
560
+ end
561
+
562
+ def map(collection, options: parallel_options, progress: false, &)
563
+ collection = wrap(collection)
564
+ progress_bar = progress ? create_progress_bar(collection.size) : nil
565
+
566
+ if parallel?
567
+ Parallel.map(
568
+ collection,
569
+ **options,
570
+ finish: ->(*) { progress_bar&.increment },
571
+ &
572
+ )
573
+ else
574
+ collection.map do |element|
575
+ yield(element).tap { progress_bar&.increment }
576
+ end
577
+ end
578
+ end
579
+
580
+ def select(collection, progress: false, &)
581
+ collection = wrap(collection)
582
+ progress_bar = progress ? create_progress_bar(collection.size) : nil
583
+
584
+ collection.select do |element|
585
+ yield(element).tap { progress_bar&.increment }
586
+ end
587
+ end
588
+
589
+ def reject(collection, progress: false, &)
590
+ collection = wrap(collection)
591
+ progress_bar = progress ? create_progress_bar(collection.size) : nil
592
+
593
+ collection.reject do |element|
594
+ yield(element).tap { progress_bar&.increment }
595
+ end
313
596
  end
314
597
 
315
598
  def lines(input)
316
- input.is_a?(String) ? input.lines.map(&:rstrip) : Array(input)
599
+ if input.is_a?(String)
600
+ input.lines.map(&:rstrip)
601
+ elsif deep?
602
+ deep_lines(input)
603
+ else
604
+ Array(input)
605
+ end
606
+ end
607
+
608
+ def deep_lines(input)
609
+ case input
610
+ when Array
611
+ [input.to_deep_struct] +
612
+ input.flat_map { |element| deep_lines(element) }
613
+ when Hash
614
+ [input.to_deep_struct] +
615
+ input.flat_map { |key, value| deep_lines([key, value]) }
616
+ when Struct
617
+ deep_lines(input.from_deep_struct).to_deep_struct
618
+ else
619
+ [input.to_deep_struct]
620
+ end
621
+ end
622
+
623
+ def wrap(ruby)
624
+ if ruby.is_a?(Hash)
625
+ ruby
626
+ elsif ruby.respond_to?(:to_a)
627
+ ruby.to_a
628
+ else
629
+ Array(ruby)
630
+ end
631
+ end
632
+
633
+ def create_progress_bar(total)
634
+ return unless progress?
635
+
636
+ Dorian::Progress.create(total:, format: progress_format)
637
+ end
638
+
639
+ def after(input, ruby: @ruby_after || @ruby)
640
+ if ruby.to_i.to_s == ruby
641
+ input[(ruby.to_i)..]
642
+ else
643
+ selected = false
644
+
645
+ input.select do |element|
646
+ selected = true if match?(element, ruby:)
647
+ selected
648
+ end
649
+ end
650
+ end
651
+
652
+ def before(input, ruby: @ruby_before || @ruby)
653
+ if ruby.to_i.to_s == ruby
654
+ input[..(ruby.to_i)]
655
+ else
656
+ selected = true
657
+
658
+ input.select do |element|
659
+ selected.tap { selected = false if match?(element, ruby:) }
660
+ end
661
+ end
662
+ end
663
+
664
+ def between(input, ruby_before: @ruby_before, ruby_after: @ruby_after)
665
+ if ruby_before.to_i.to_s == ruby_before &&
666
+ ruby_after.to_i.to_s == ruby_after
667
+ input[(ruby_after.to_i)..(ruby_before.to_i)]
668
+ else
669
+ selected = false
670
+
671
+ input.select do |element|
672
+ selected = true if match?(element, ruby: ruby_after)
673
+ selected.tap do
674
+ selected = false if match?(element, ruby: ruby_before)
675
+ end
676
+ end
677
+ end
678
+ end
679
+
680
+ def anonymize(input)
681
+ if input.is_a?(String)
682
+ input.gsub(/[a-z]/, "a").gsub(/[A-Z]/, "A").gsub(/[0-9]/, "0")
683
+ elsif input.is_a?(Integer)
684
+ 0
685
+ elsif input.is_a?(Float)
686
+ 0.0
687
+ elsif input.is_a?(TrueClass) || input.is_a?(FalseClass)
688
+ false
689
+ elsif input.nil?
690
+ nil
691
+ elsif input.is_a?(Hash)
692
+ input.transform_values { |value| anonymize(value) }
693
+ elsif input.is_a?(Array)
694
+ input.map { |element| anonymize(element) }
695
+ elsif input.is_a?(Struct)
696
+ anonymize(input.from_deep_struct).to_deep_struct
697
+ else
698
+ raise "#{input.class.inspect} not supported"
699
+ end
700
+ end
701
+
702
+ def match?(element, ruby: @ruby)
703
+ !!evaluates(ruby:, it: element, stdout: false, returns: true).returned
704
+ end
705
+
706
+ def token(file)
707
+ token_file = File.join(Dir.home, file)
708
+
709
+ if File.exist?(token_file)
710
+ token = File.read(token_file).strip
711
+ else
712
+ print "token: "
713
+ token = gets.strip
714
+ File.write(token_file, token)
715
+ puts "token written to #{token_file}"
716
+ end
717
+
718
+ token
719
+ end
720
+
721
+ def completion(token:, model:, messages:)
722
+ body =
723
+ post(
724
+ "https://api.openai.com/v1/chat/completions",
725
+ headers: {
726
+ "Content-Type" => "application/json",
727
+ "Authorization" => "Bearer #{token}"
728
+ },
729
+ body: { model:, messages: }.to_json
730
+ )
731
+
732
+ json = JSON.parse(body)
733
+ output = json.dig("choices", 0, "message", "content")
734
+
735
+ if output
736
+ output.strip
737
+ else
738
+ abort JSON.pretty_generate(json)
739
+ end
740
+ end
741
+
742
+ def post(url, headers: {}, body: {})
743
+ uri = URI.parse(url)
744
+ http = Net::HTTP.new(uri.host, uri.port)
745
+ http.use_ssl = true
746
+ request = Net::HTTP::Post.new(uri.path, headers)
747
+ request.body = body
748
+ http.request(request).body
749
+ end
750
+
751
+ def short(string)
752
+ string[0..5000]
753
+ end
754
+
755
+ def encoder
756
+ Tiktoken.encoding_for_model("gpt-4o")
317
757
  end
318
758
 
319
759
  def evaluates(
320
- ruby,
760
+ ruby: @ruby,
321
761
  it: nil,
322
762
  debug: debug?,
323
763
  stdout: stdout?,
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dorian
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.2
4
+ version: 2.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dorian Marié
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-08-31 00:00:00.000000000 Z
11
+ date: 2024-09-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: csv
@@ -80,6 +80,20 @@ dependencies:
80
80
  - - ">="
81
81
  - !ruby/object:Gem::Version
82
82
  version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: git
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
83
97
  - !ruby/object:Gem::Dependency
84
98
  name: json
85
99
  requirement: !ruby/object:Gem::Requirement