index_me 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/bin/index_me +7 -0
- data/bin/test1.rb +5 -0
- data/lib/index_me/debug.rb +15 -0
- data/lib/index_me/indexer.rb +80 -0
- data/lib/index_me/query.rb +42 -0
- data/lib/index_me/version.rb +6 -0
- data/lib/index_me.rb +141 -0
- metadata +77 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: ff3174a890bb2908760bab10bdff5ffa54c640899e8af4daa58510245f8870f2
|
|
4
|
+
data.tar.gz: 3669ac2ae335d89a9c5a638b5f1f67c863a84728187c66046a382d108bfc6af6
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: '02956ed5ef0d04613999b64c2e12f5afa64da39de58cca4132bc1082eb0f4dd35223c7d8bfa1ec1fcee476fa972dd6123e0721bf814eec58c22c02bf4fc66b06'
|
|
7
|
+
data.tar.gz: '08c1310f6be862402bf93e2e68e47d63eac8b311e43815060f1dcc1d4a19f36a444b756bc9b100e18270d36ace15734bcd78e4afeda231aeb76459d3f3eff7c4'
|
data/bin/index_me
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class IndexMe
|
|
4
|
+
module Debug
|
|
5
|
+
|
|
6
|
+
def debug(*lines, always: false, &blk)
|
|
7
|
+
return unless @debug || always
|
|
8
|
+
|
|
9
|
+
$stderr.puts(*lines.flatten)
|
|
10
|
+
blk.() if blk
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'ostruct'
|
|
4
|
+
require_relative 'debug'
|
|
5
|
+
|
|
6
|
+
class IndexMe
|
|
7
|
+
DataError = Class.new(RuntimeError)
|
|
8
|
+
|
|
9
|
+
class Indexer
|
|
10
|
+
include Debug
|
|
11
|
+
|
|
12
|
+
attr_reader :current, :data, :file, :stream
|
|
13
|
+
|
|
14
|
+
EMPTY = %r{\A \s* \z}x
|
|
15
|
+
RESOURCE = %r{ \/ }x
|
|
16
|
+
TAG = %r{\A [[:lower:]][-[:lower:]]* \z}x
|
|
17
|
+
|
|
18
|
+
def index
|
|
19
|
+
loop do
|
|
20
|
+
index_resource
|
|
21
|
+
index_tags
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
data.resource_count = data.resources.size
|
|
25
|
+
data.tag_count = data.tags.size
|
|
26
|
+
data
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
private
|
|
30
|
+
def initialize(file, debug:)
|
|
31
|
+
@debug = debug
|
|
32
|
+
@file = file
|
|
33
|
+
@stream = IO.foreach(file, chomp: true).lazy.map(&:strip).with_index
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def add_tags(tags, lnb)
|
|
37
|
+
tags = tags.strip.split(",")
|
|
38
|
+
raise DataError, "no tags defined for resource #{@current} in line #{lnb.succ}" if tags.empty?
|
|
39
|
+
tags.each { add_tag(it, lnb) }
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def add_tag(tag, lnb)
|
|
43
|
+
raise DataError, "illegal tag format in line #{lnb} tag: #{tag.inspect}, need only lower case and dash" unless
|
|
44
|
+
TAG === tag
|
|
45
|
+
do_add_tag(tag, lnb)
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def do_add_tag(tag, lnb)
|
|
49
|
+
data.tags << tag
|
|
50
|
+
entry = data.relations.fetch(tag) { data.relations[tag] = OpenStruct.new(count: 0, resources: Set.new) }
|
|
51
|
+
raise DataError, "multiple specification of tag #{tag.inspect} for resource #{@current.inspect} in line #{lnb.succ}" if
|
|
52
|
+
entry.resources.member? @current
|
|
53
|
+
|
|
54
|
+
entry.resources << @current
|
|
55
|
+
entry.count = entry.count.succ
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def data
|
|
59
|
+
@__data__ ||= OpenStruct.new(
|
|
60
|
+
resource_count: 0, resources: Set.new, tag_count: 0, tags: Set.new, relations: {}
|
|
61
|
+
)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def index_resource
|
|
65
|
+
stream.next => [line, lnb]
|
|
66
|
+
@current = line
|
|
67
|
+
raise DataError, "resource #{@current} missing a `/' in line #{lnb.succ}" unless RESOURCE === @current
|
|
68
|
+
raise DataError, "duplicate resource #{@current} in line #{lnb.succ}" if data.resources.member? @current
|
|
69
|
+
data.resources << @current
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def index_tags
|
|
73
|
+
stream.next => [tags, lnb]
|
|
74
|
+
add_tags(tags, lnb)
|
|
75
|
+
rescue StopIteration
|
|
76
|
+
raise DataError, "resource #{@current} has no tags line at EOF"
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'debug'
|
|
4
|
+
class IndexMe
|
|
5
|
+
class Query
|
|
6
|
+
|
|
7
|
+
include Debug
|
|
8
|
+
|
|
9
|
+
attr_reader :data, :relations
|
|
10
|
+
|
|
11
|
+
def run(query)
|
|
12
|
+
result = Set.new
|
|
13
|
+
query.inject(Set.new) do |result, conjunction|
|
|
14
|
+
partial = query(conjunction)
|
|
15
|
+
result + partial
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
private
|
|
20
|
+
def initialize(data, debug: false)
|
|
21
|
+
@data = data
|
|
22
|
+
@debug = debug
|
|
23
|
+
@relations = data.relations
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def query(conjunction)
|
|
27
|
+
tags = conjunction.split(",")
|
|
28
|
+
relations = data.relations.slice(*tags).to_a
|
|
29
|
+
optimized = relations.sort_by { |_, v| v.count }.map { |_, v| v.resources }
|
|
30
|
+
|
|
31
|
+
optimized.reduce do |result, res|
|
|
32
|
+
result.intersection(res)
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# def print_ordered(ordered)
|
|
37
|
+
# ordered.each { $stderr.puts it.inspect }
|
|
38
|
+
# end
|
|
39
|
+
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
# SPDX-License-Identifier: AGPL-3.0-or-later
|
data/lib/index_me.rb
ADDED
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'index_me/debug'
|
|
4
|
+
require_relative 'index_me/indexer'
|
|
5
|
+
require_relative 'index_me/query'
|
|
6
|
+
class IndexMe
|
|
7
|
+
UsageError = Class.new(RuntimeError)
|
|
8
|
+
|
|
9
|
+
include Debug
|
|
10
|
+
|
|
11
|
+
attr_reader :data
|
|
12
|
+
|
|
13
|
+
def run()
|
|
14
|
+
if @only_index
|
|
15
|
+
p data
|
|
16
|
+
else
|
|
17
|
+
raise UsageError, "missing query, provide a list of tags or use -i|--index-only" if @query.empty?
|
|
18
|
+
## FUTURE: Interactive behavior (interrupt with c-c)
|
|
19
|
+
## q = Query.new(data)
|
|
20
|
+
## loop do
|
|
21
|
+
## query = gets(chomp: true)
|
|
22
|
+
## puts q.run(query)
|
|
23
|
+
## end
|
|
24
|
+
|
|
25
|
+
results = Query.new(data, debug: @debug).run(@query)
|
|
26
|
+
puts(results.to_a)
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
private
|
|
31
|
+
def initialize(args)
|
|
32
|
+
parse(args)
|
|
33
|
+
check_file!
|
|
34
|
+
@query = args
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def check_file!
|
|
38
|
+
@file ||= '.index_me.db'
|
|
39
|
+
raise ArgumentError, "file #{@file.inspect} is not readable" unless File.readable?(@file)
|
|
40
|
+
@index = @file.sub(%r/ \. db \z/x, ".idx")
|
|
41
|
+
|
|
42
|
+
index_if_necessary
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def dump_tag(data:, fh:, tag:)
|
|
46
|
+
fh.puts("tag: #{tag}")
|
|
47
|
+
fh.puts(" count: #{data.count}")
|
|
48
|
+
fh.puts(" res: #{data.resources.to_a.join(", ")}")
|
|
49
|
+
fh.puts
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def index_if_necessary
|
|
53
|
+
@data =
|
|
54
|
+
if index_newer?
|
|
55
|
+
debug("index up to date... skipping reindexing", always: @only_index)
|
|
56
|
+
Marshal.load(File.read(@index))
|
|
57
|
+
else
|
|
58
|
+
debug("reindexing #{@file} => #{@index}", always: @only_index)
|
|
59
|
+
reindex
|
|
60
|
+
end
|
|
61
|
+
maybe_dump
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def index_newer?
|
|
65
|
+
return false if @force
|
|
66
|
+
return false unless File.readable?(@index)
|
|
67
|
+
|
|
68
|
+
idxtime = File.stat(@index).mtime
|
|
69
|
+
dbtime = File.stat(@file).mtime
|
|
70
|
+
|
|
71
|
+
debug(
|
|
72
|
+
"db time: #{dbtime}",
|
|
73
|
+
"index time: #{idxtime}",
|
|
74
|
+
)
|
|
75
|
+
idxtime > dbtime
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def maybe_dump
|
|
79
|
+
return data unless @dump
|
|
80
|
+
|
|
81
|
+
File.open(@file.sub(/\.db\z/, ".dump"), "w") do |fh|
|
|
82
|
+
fh.puts("resource count: #{data.resource_count}")
|
|
83
|
+
fh.puts(data.resources.inspect)
|
|
84
|
+
data.relations.each { |tag, data| dump_tag(data:, fh:, tag:) }
|
|
85
|
+
end
|
|
86
|
+
data
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def parse(args)
|
|
90
|
+
loop do
|
|
91
|
+
case args.first
|
|
92
|
+
when "--debug"
|
|
93
|
+
@debug = true
|
|
94
|
+
args.shift
|
|
95
|
+
when "--dump"
|
|
96
|
+
@dump = true
|
|
97
|
+
args.shift
|
|
98
|
+
when "--index-only"
|
|
99
|
+
@only_index = true
|
|
100
|
+
args.shift
|
|
101
|
+
when "-F", "--force"
|
|
102
|
+
@force = true
|
|
103
|
+
args.shift
|
|
104
|
+
when "-f", "--file"
|
|
105
|
+
args.shift
|
|
106
|
+
raise ArgumentError, "-f|--file needs argument" if args.empty?
|
|
107
|
+
@file = args.first
|
|
108
|
+
args.shift
|
|
109
|
+
when /\A-(.*)/
|
|
110
|
+
single_options(Regexp.last_match[1])
|
|
111
|
+
args.shift
|
|
112
|
+
else
|
|
113
|
+
break
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def reindex
|
|
119
|
+
@data = Indexer.new(@file, debug: @debug).index
|
|
120
|
+
File.write(@index, Marshal.dump(@data))
|
|
121
|
+
@data
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
def single_options(str)
|
|
125
|
+
str.grapheme_clusters.each do |flag|
|
|
126
|
+
case flag
|
|
127
|
+
when "d"
|
|
128
|
+
@debug = true
|
|
129
|
+
when "D"
|
|
130
|
+
@dump = true
|
|
131
|
+
when "F"
|
|
132
|
+
@force = true
|
|
133
|
+
when "i"
|
|
134
|
+
@only_index = true
|
|
135
|
+
else
|
|
136
|
+
raise ArgumentError, "bad flag: #{flag}"
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
# SPDX-License-Identifier: AGPL-3.0-or-later
|
metadata
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: index_me
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.0.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Robert Dober
|
|
8
|
+
bindir: bin
|
|
9
|
+
cert_chain: []
|
|
10
|
+
date: 2025-06-19 00:00:00.000000000 Z
|
|
11
|
+
dependencies:
|
|
12
|
+
- !ruby/object:Gem::Dependency
|
|
13
|
+
name: clipboard
|
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
|
15
|
+
requirements:
|
|
16
|
+
- - "~>"
|
|
17
|
+
- !ruby/object:Gem::Version
|
|
18
|
+
version: 2.0.0
|
|
19
|
+
type: :runtime
|
|
20
|
+
prerelease: false
|
|
21
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
22
|
+
requirements:
|
|
23
|
+
- - "~>"
|
|
24
|
+
- !ruby/object:Gem::Version
|
|
25
|
+
version: 2.0.0
|
|
26
|
+
- !ruby/object:Gem::Dependency
|
|
27
|
+
name: lab42_base
|
|
28
|
+
requirement: !ruby/object:Gem::Requirement
|
|
29
|
+
requirements:
|
|
30
|
+
- - "~>"
|
|
31
|
+
- !ruby/object:Gem::Version
|
|
32
|
+
version: 0.0.2
|
|
33
|
+
type: :runtime
|
|
34
|
+
prerelease: false
|
|
35
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
36
|
+
requirements:
|
|
37
|
+
- - "~>"
|
|
38
|
+
- !ruby/object:Gem::Version
|
|
39
|
+
version: 0.0.2
|
|
40
|
+
description: Create an index file based on a data file and then use searching indexed
|
|
41
|
+
resources
|
|
42
|
+
email: robert.dober@gmail.com
|
|
43
|
+
executables:
|
|
44
|
+
- index_me
|
|
45
|
+
extensions: []
|
|
46
|
+
extra_rdoc_files: []
|
|
47
|
+
files:
|
|
48
|
+
- bin/index_me
|
|
49
|
+
- bin/test1.rb
|
|
50
|
+
- lib/index_me.rb
|
|
51
|
+
- lib/index_me/debug.rb
|
|
52
|
+
- lib/index_me/indexer.rb
|
|
53
|
+
- lib/index_me/query.rb
|
|
54
|
+
- lib/index_me/version.rb
|
|
55
|
+
homepage: https://codeberg.org/lab419/speculate_about
|
|
56
|
+
licenses:
|
|
57
|
+
- AGPL-3.0-or-later
|
|
58
|
+
metadata: {}
|
|
59
|
+
rdoc_options: []
|
|
60
|
+
require_paths:
|
|
61
|
+
- lib
|
|
62
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
63
|
+
requirements:
|
|
64
|
+
- - ">="
|
|
65
|
+
- !ruby/object:Gem::Version
|
|
66
|
+
version: 3.4.1
|
|
67
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
68
|
+
requirements:
|
|
69
|
+
- - ">="
|
|
70
|
+
- !ruby/object:Gem::Version
|
|
71
|
+
version: '0'
|
|
72
|
+
requirements: []
|
|
73
|
+
rubygems_version: 3.6.9
|
|
74
|
+
specification_version: 4
|
|
75
|
+
summary: Create an index file based on a data file and then use searching indexed
|
|
76
|
+
resources
|
|
77
|
+
test_files: []
|