index_me 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: ff3174a890bb2908760bab10bdff5ffa54c640899e8af4daa58510245f8870f2
4
+ data.tar.gz: 3669ac2ae335d89a9c5a638b5f1f67c863a84728187c66046a382d108bfc6af6
5
+ SHA512:
6
+ metadata.gz: '02956ed5ef0d04613999b64c2e12f5afa64da39de58cca4132bc1082eb0f4dd35223c7d8bfa1ec1fcee476fa972dd6123e0721bf814eec58c22c02bf4fc66b06'
7
+ data.tar.gz: '08c1310f6be862402bf93e2e68e47d63eac8b311e43815060f1dcc1d4a19f36a444b756bc9b100e18270d36ace15734bcd78e4afeda231aeb76459d3f3eff7c4'
data/bin/index_me ADDED
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/env ruby
2
+ require_relative '../lib/index_me'
3
+
4
+ raise "Do not require, I am a script" unless $0 == __FILE__
5
+
6
+ IndexMe.new(ARGV.dup).run
7
+ # SPDX-License-Identifier: AGPL-3.0-or-later
data/bin/test1.rb ADDED
@@ -0,0 +1,5 @@
1
+ loop do
2
+ x =gets
3
+ p x
4
+ end
5
+
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ class IndexMe
4
+ module Debug
5
+
6
+ def debug(*lines, always: false, &blk)
7
+ return unless @debug || always
8
+
9
+ $stderr.puts(*lines.flatten)
10
+ blk.() if blk
11
+ end
12
+
13
+ end
14
+ end
15
+ # SPDX-License-Identifier: AGPL-3.0-or-later
@@ -0,0 +1,80 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'ostruct'
4
+ require_relative 'debug'
5
+
6
+ class IndexMe
7
+ DataError = Class.new(RuntimeError)
8
+
9
+ class Indexer
10
+ include Debug
11
+
12
+ attr_reader :current, :data, :file, :stream
13
+
14
+ EMPTY = %r{\A \s* \z}x
15
+ RESOURCE = %r{ \/ }x
16
+ TAG = %r{\A [[:lower:]][-[:lower:]]* \z}x
17
+
18
+ def index
19
+ loop do
20
+ index_resource
21
+ index_tags
22
+ end
23
+
24
+ data.resource_count = data.resources.size
25
+ data.tag_count = data.tags.size
26
+ data
27
+ end
28
+
29
+ private
30
+ def initialize(file, debug:)
31
+ @debug = debug
32
+ @file = file
33
+ @stream = IO.foreach(file, chomp: true).lazy.map(&:strip).with_index
34
+ end
35
+
36
+ def add_tags(tags, lnb)
37
+ tags = tags.strip.split(",")
38
+ raise DataError, "no tags defined for resource #{@current} in line #{lnb.succ}" if tags.empty?
39
+ tags.each { add_tag(it, lnb) }
40
+ end
41
+
42
+ def add_tag(tag, lnb)
43
+ raise DataError, "illegal tag format in line #{lnb} tag: #{tag.inspect}, need only lower case and dash" unless
44
+ TAG === tag
45
+ do_add_tag(tag, lnb)
46
+ end
47
+
48
+ def do_add_tag(tag, lnb)
49
+ data.tags << tag
50
+ entry = data.relations.fetch(tag) { data.relations[tag] = OpenStruct.new(count: 0, resources: Set.new) }
51
+ raise DataError, "multiple specification of tag #{tag.inspect} for resource #{@current.inspect} in line #{lnb.succ}" if
52
+ entry.resources.member? @current
53
+
54
+ entry.resources << @current
55
+ entry.count = entry.count.succ
56
+ end
57
+
58
+ def data
59
+ @__data__ ||= OpenStruct.new(
60
+ resource_count: 0, resources: Set.new, tag_count: 0, tags: Set.new, relations: {}
61
+ )
62
+ end
63
+
64
+ def index_resource
65
+ stream.next => [line, lnb]
66
+ @current = line
67
+ raise DataError, "resource #{@current} missing a `/' in line #{lnb.succ}" unless RESOURCE === @current
68
+ raise DataError, "duplicate resource #{@current} in line #{lnb.succ}" if data.resources.member? @current
69
+ data.resources << @current
70
+ end
71
+
72
+ def index_tags
73
+ stream.next => [tags, lnb]
74
+ add_tags(tags, lnb)
75
+ rescue StopIteration
76
+ raise DataError, "resource #{@current} has no tags line at EOF"
77
+ end
78
+ end
79
+ end
80
+ # SPDX-License-Identifier: AGPL-3.0-or-later
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'debug'
4
+ class IndexMe
5
+ class Query
6
+
7
+ include Debug
8
+
9
+ attr_reader :data, :relations
10
+
11
+ def run(query)
12
+ result = Set.new
13
+ query.inject(Set.new) do |result, conjunction|
14
+ partial = query(conjunction)
15
+ result + partial
16
+ end
17
+ end
18
+
19
+ private
20
+ def initialize(data, debug: false)
21
+ @data = data
22
+ @debug = debug
23
+ @relations = data.relations
24
+ end
25
+
26
+ def query(conjunction)
27
+ tags = conjunction.split(",")
28
+ relations = data.relations.slice(*tags).to_a
29
+ optimized = relations.sort_by { |_, v| v.count }.map { |_, v| v.resources }
30
+
31
+ optimized.reduce do |result, res|
32
+ result.intersection(res)
33
+ end
34
+ end
35
+
36
+ # def print_ordered(ordered)
37
+ # ordered.each { $stderr.puts it.inspect }
38
+ # end
39
+
40
+ end
41
+ end
42
+ # SPDX-License-Identifier: AGPL-3.0-or-later
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ class IndexMe
4
+ VERSION = '0.0.0'
5
+ end
6
+ # SPDX-License-Identifier: AGPL-3.0-or-later
data/lib/index_me.rb ADDED
@@ -0,0 +1,141 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'index_me/debug'
4
+ require_relative 'index_me/indexer'
5
+ require_relative 'index_me/query'
6
+ class IndexMe
7
+ UsageError = Class.new(RuntimeError)
8
+
9
+ include Debug
10
+
11
+ attr_reader :data
12
+
13
+ def run()
14
+ if @only_index
15
+ p data
16
+ else
17
+ raise UsageError, "missing query, provide a list of tags or use -i|--index-only" if @query.empty?
18
+ ## FUTURE: Interactive behavior (interrupt with c-c)
19
+ ## q = Query.new(data)
20
+ ## loop do
21
+ ## query = gets(chomp: true)
22
+ ## puts q.run(query)
23
+ ## end
24
+
25
+ results = Query.new(data, debug: @debug).run(@query)
26
+ puts(results.to_a)
27
+ end
28
+ end
29
+
30
+ private
31
+ def initialize(args)
32
+ parse(args)
33
+ check_file!
34
+ @query = args
35
+ end
36
+
37
+ def check_file!
38
+ @file ||= '.index_me.db'
39
+ raise ArgumentError, "file #{@file.inspect} is not readable" unless File.readable?(@file)
40
+ @index = @file.sub(%r/ \. db \z/x, ".idx")
41
+
42
+ index_if_necessary
43
+ end
44
+
45
+ def dump_tag(data:, fh:, tag:)
46
+ fh.puts("tag: #{tag}")
47
+ fh.puts(" count: #{data.count}")
48
+ fh.puts(" res: #{data.resources.to_a.join(", ")}")
49
+ fh.puts
50
+ end
51
+
52
+ def index_if_necessary
53
+ @data =
54
+ if index_newer?
55
+ debug("index up to date... skipping reindexing", always: @only_index)
56
+ Marshal.load(File.read(@index))
57
+ else
58
+ debug("reindexing #{@file} => #{@index}", always: @only_index)
59
+ reindex
60
+ end
61
+ maybe_dump
62
+ end
63
+
64
+ def index_newer?
65
+ return false if @force
66
+ return false unless File.readable?(@index)
67
+
68
+ idxtime = File.stat(@index).mtime
69
+ dbtime = File.stat(@file).mtime
70
+
71
+ debug(
72
+ "db time: #{dbtime}",
73
+ "index time: #{idxtime}",
74
+ )
75
+ idxtime > dbtime
76
+ end
77
+
78
+ def maybe_dump
79
+ return data unless @dump
80
+
81
+ File.open(@file.sub(/\.db\z/, ".dump"), "w") do |fh|
82
+ fh.puts("resource count: #{data.resource_count}")
83
+ fh.puts(data.resources.inspect)
84
+ data.relations.each { |tag, data| dump_tag(data:, fh:, tag:) }
85
+ end
86
+ data
87
+ end
88
+
89
+ def parse(args)
90
+ loop do
91
+ case args.first
92
+ when "--debug"
93
+ @debug = true
94
+ args.shift
95
+ when "--dump"
96
+ @dump = true
97
+ args.shift
98
+ when "--index-only"
99
+ @only_index = true
100
+ args.shift
101
+ when "-F", "--force"
102
+ @force = true
103
+ args.shift
104
+ when "-f", "--file"
105
+ args.shift
106
+ raise ArgumentError, "-f|--file needs argument" if args.empty?
107
+ @file = args.first
108
+ args.shift
109
+ when /\A-(.*)/
110
+ single_options(Regexp.last_match[1])
111
+ args.shift
112
+ else
113
+ break
114
+ end
115
+ end
116
+ end
117
+
118
+ def reindex
119
+ @data = Indexer.new(@file, debug: @debug).index
120
+ File.write(@index, Marshal.dump(@data))
121
+ @data
122
+ end
123
+
124
+ def single_options(str)
125
+ str.grapheme_clusters.each do |flag|
126
+ case flag
127
+ when "d"
128
+ @debug = true
129
+ when "D"
130
+ @dump = true
131
+ when "F"
132
+ @force = true
133
+ when "i"
134
+ @only_index = true
135
+ else
136
+ raise ArgumentError, "bad flag: #{flag}"
137
+ end
138
+ end
139
+ end
140
+ end
141
+ # SPDX-License-Identifier: AGPL-3.0-or-later
metadata ADDED
@@ -0,0 +1,77 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: index_me
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Robert Dober
8
+ bindir: bin
9
+ cert_chain: []
10
+ date: 2025-06-19 00:00:00.000000000 Z
11
+ dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: clipboard
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - "~>"
17
+ - !ruby/object:Gem::Version
18
+ version: 2.0.0
19
+ type: :runtime
20
+ prerelease: false
21
+ version_requirements: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - "~>"
24
+ - !ruby/object:Gem::Version
25
+ version: 2.0.0
26
+ - !ruby/object:Gem::Dependency
27
+ name: lab42_base
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - "~>"
31
+ - !ruby/object:Gem::Version
32
+ version: 0.0.2
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: 0.0.2
40
+ description: Create an index file based on a data file and then use searching indexed
41
+ resources
42
+ email: robert.dober@gmail.com
43
+ executables:
44
+ - index_me
45
+ extensions: []
46
+ extra_rdoc_files: []
47
+ files:
48
+ - bin/index_me
49
+ - bin/test1.rb
50
+ - lib/index_me.rb
51
+ - lib/index_me/debug.rb
52
+ - lib/index_me/indexer.rb
53
+ - lib/index_me/query.rb
54
+ - lib/index_me/version.rb
55
+ homepage: https://codeberg.org/lab419/speculate_about
56
+ licenses:
57
+ - AGPL-3.0-or-later
58
+ metadata: {}
59
+ rdoc_options: []
60
+ require_paths:
61
+ - lib
62
+ required_ruby_version: !ruby/object:Gem::Requirement
63
+ requirements:
64
+ - - ">="
65
+ - !ruby/object:Gem::Version
66
+ version: 3.4.1
67
+ required_rubygems_version: !ruby/object:Gem::Requirement
68
+ requirements:
69
+ - - ">="
70
+ - !ruby/object:Gem::Version
71
+ version: '0'
72
+ requirements: []
73
+ rubygems_version: 3.6.9
74
+ specification_version: 4
75
+ summary: Create an index file based on a data file and then use searching indexed
76
+ resources
77
+ test_files: []