wordgraph 0.1.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5eec332d565ac8fcf1192d04b99b9d6d313b68a2291dd44ce7532dfeccb42141
4
- data.tar.gz: 2d2f2efa8e4d5747ed81ec6c1fa555df1dcf95e5970102b27c464340ddc50165
3
+ metadata.gz: 45813d9103ca02b5238dbfe12b4aa656da441dcbe66be0fac9e8e46f81b9744a
4
+ data.tar.gz: 21516d98f4aea705561d34c8a23835dc23df1d575369a0bc4a7cd225a863a7b3
5
5
  SHA512:
6
- metadata.gz: d3009ff268b5ca9a58f9b98d82541834b95f885662e8a828c1c4a289109157bd452cbda69edcc3202fc6835607772e6ca0dfaec53cbeb628e567dea01fdc8a64
7
- data.tar.gz: 1e71d6ec4dea81d0cfbd1e1738d25df072810a597408a10ebbc4bfa65fd39121a946229c8fdfe935deeae3d63a8682ae303f624c18d95d65b919cf06ab40113e
6
+ metadata.gz: 2170ee77edca8125943b3dac61aee6d0fed2a753b7d3456a7acadc5dd4cd61711663b8fc7c833f329eac5d3db8e9d1f83b2973f3e62bb8a29ad5a011e30df29b
7
+ data.tar.gz: b69f1a188c603e887e145720fd123db9397cb4bca03b6dbb2104b947c5ef351b175f6efd270cf8a7658b22fce68ace1f41194a5069f8079000269e529a23bdf9
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ lib/*.rb
2
+ lib/**/*.rb
3
+ -
4
+ CHANGELOG.rdoc
5
+ LICENSE.md
data/bin/console ADDED
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require "bundler/setup"
5
+ require "wordgraph"
6
+
7
+ # You can add fixtures and/or initialization code here to make experimenting
8
+ # with your gem easier. You can also use a different console, if you like.
9
+
10
+ require "irb"
11
+ IRB.start(__FILE__)
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
data/bin/wordgraph ADDED
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require "wordgraph/cli"
5
+
6
+ cli = Wordgraph::CLI.new
7
+ options = cli.parse(ARGV)
data/lib/wordgraph/cli.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "optparse"
4
+ require_relative "core"
4
5
 
5
6
  module Wordgraph
6
7
  class CLI
@@ -10,27 +11,57 @@ module Wordgraph
10
11
 
11
12
  def parse(args)
12
13
  parser = OptionParser.new do |opts|
13
- opts.banner = "Usage: cli.rb [options] ARG..."
14
+ opts.banner = "E.g.: wordgraph [options] ARG..."
14
15
  opts.separator ""
15
16
  opts.separator "Specific options:"
16
17
 
18
+ opts.on("-h", "--help", "Prints this help") do
19
+ puts opts
20
+ puts args
21
+ end
22
+
17
23
  opts.on("-v", "--[no-]verbose", "Run verbosely") do |v|
18
24
  @options[:verbose] = v
19
25
  end
20
26
 
21
- opts.on("-h", "--help", "Prints this help") do
22
- puts opts
23
- puts args
24
- exit
27
+ opts.on("-o=STRING", "--output-directory=STRING", "Directory to output files", String) do |dir|
28
+ # Supports /mnt/[:drive_letter]/[:directory]
29
+ @options[:output_directory] = dir
30
+ end
31
+
32
+ opts.on("-n=STRING", "--name=STRING", "Output file name", String) do |name|
33
+ @options[:name] = name
34
+ end
35
+
36
+ opts.on("--[no-]overwrite", "Overwrite output file") do |overwrite|
37
+ @options[:overwrite] = overwrite
38
+ end
39
+
40
+ opts.on("-s=INTEGER", "--seed=INTEGER", "Deterministic word order", Integer) do |seed|
41
+ @options[:seed] = seed
42
+ end
43
+
44
+ opts.on("-m=INTEGER", "--nlargest=INTEGER", "Maximum amount of words in graph", Integer) do |nlargest|
45
+ @options[:nlargest] = nlargest
46
+ end
47
+
48
+ opts.on("--font=STRING", ["times", "georgia", "garamond", "arial",
49
+ "helvetica", "verdana", "courier", "cursive", "papyrus"],
50
+ "Use a web safe font", String) do |font|
51
+ @options[:font] = font
25
52
  end
26
53
  end
27
54
 
28
55
  begin
29
- parser.parse!(args)
56
+ files = parser.parse!(args)
57
+ core = Core.new(files, **@options)
58
+ core.process
30
59
  rescue OptionParser::InvalidOption => e
31
60
  puts e.message
32
61
  puts parser
33
62
  exit 1
63
+ rescue ArgumentError => e
64
+ puts "Invalid argument: #{e}"
34
65
  end
35
66
 
36
67
  @options
@@ -0,0 +1,182 @@
1
+ require_relative "mathwg"
2
+
3
+ module Wordgraph
4
+ class Core
5
+ def initialize(files, verbose: false, output_directory: Dir.pwd, name: "wordgraph",
6
+ overwrite: false, seed: nil, nlargest: nil, font: "times")
7
+ @files = files
8
+ @verbose = verbose
9
+ @output_directory = output_directory
10
+ @name = name
11
+ @overwrite = overwrite
12
+ @seed = seed
13
+ @nlargest = nlargest
14
+ @max_size = 20
15
+ @min_font_size = 12;
16
+ @max_font_size = 48;
17
+ @font = font.downcase
18
+ if @verbose
19
+ puts "Proceeding with settings:"
20
+ self.instance_variables.each do |var|
21
+ puts "#{var}: #{self.instance_variable_get(var) || false}"
22
+ end
23
+ end
24
+ end
25
+
26
+ def tokenize(word)
27
+ # Lowercase
28
+ word = word.downcase
29
+ # Strip trailing punctuation at word start
30
+ word = word.sub(/^[\.,;:!?'"`(\[\{<]+/, "")
31
+ # Strip trailing punctuation at word end
32
+ word = word.sub(/[\.,;:!?'"`)\]\}>]+$/, "")
33
+ return word
34
+ end
35
+
36
+ def generate_cloud(tokens)
37
+ # https://en.wikipedia.org/wiki/Tag_cloud
38
+ raise ArgumentError, "Empty tokens map" unless tokens.length > 0
39
+ # Linear normalization
40
+ # TODO: logarithmic function for larger texts
41
+ min_count = tokens.values.min
42
+ max_count = tokens.values.max
43
+ max_sub_min = [max_count - min_count, 1].max
44
+ tokens = tokens.max_by(@nlargest) { |v| v }.to_h if @nlargest
45
+ tokens.each do |token, count|
46
+ size = count <= min_count ?
47
+ 1 :
48
+ ((@max_size * (count - min_count)) / max_sub_min).ceil
49
+ tokens[token] = {
50
+ count: count,
51
+ size: size
52
+ }
53
+ end
54
+ self.write_html(tokens)
55
+ end
56
+
57
+ def get_path
58
+ raise ArgumentError, "Output directory: #{@output_directory} not found" if !Dir.exist?(@output_directory)
59
+ file_name = File.basename(@name) + ".html"
60
+ out = File.join(@output_directory, file_name)
61
+ puts "Creating file: #{out}"
62
+ stop_exists = File.exist?(out) && !@overwrite
63
+ raise ArgumentError, "Output file already exists, either overwrite or change the name" if stop_exists
64
+ return out
65
+ end
66
+
67
+ def remap(from_min, from_max, to_min, to_max, value)
68
+ def lerp(a, b, t)
69
+ return (1 - t) * a + b * t
70
+ end
71
+ def invLerp(a, b, v)
72
+ return a === b ? 0 : (v - a).to_f / (b - a)
73
+ end
74
+ return lerp(to_min, to_max, invLerp(from_min, from_max, value))
75
+ end
76
+
77
+ def write_html(tokens)
78
+ out = self.get_path
79
+ File.open(out, File::RDWR | File::CREAT) do |f|
80
+ f.flock(File::LOCK_EX)
81
+ f.truncate(0)
82
+ document_setup = <<~HTML
83
+ <!DOCTYPE html>
84
+ <html lang="en">
85
+ <head>
86
+ <meta charset="UTF-8">
87
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
88
+ <title>wordgraph</title>
89
+ </head>
90
+ <body>
91
+ #{tokens.to_a.shuffle(random: Random.new(*@seed)).map { |k, v|
92
+ fs = remap(1, @max_size, @min_font_size, @max_font_size, v[:size]).floor
93
+ title = "#{v[:count].to_s} occurrence #{(v[:count] > 1 ) ? "s" : ""}"
94
+ <<~HTML.strip
95
+ <span
96
+ title='#{title}'
97
+ style='font-size: #{fs}px;'
98
+ aria-role='listitem'
99
+ aria-label='#{title}'>#{k}</span>
100
+ HTML
101
+ }.join("\n\s\s")}
102
+ </body>
103
+ <style>
104
+ body {
105
+ display: flex;
106
+ flex-wrap: wrap;
107
+ gap: 8px;
108
+ justify-content: center;
109
+ background-color: #000;
110
+ color: #FFFFFF;
111
+ line-height: 1.6;
112
+ margin: 0;
113
+ padding: 20px;
114
+ font-family: #{@font};
115
+ }
116
+ span {
117
+ display: inline-block;
118
+ margin: 2px 6px;
119
+ padding: 4px 12px;
120
+ border-radius: 4px;
121
+ }
122
+ </style>
123
+ </html>
124
+ HTML
125
+ f.write(document_setup)
126
+ end
127
+ puts File.read(out) if @verbose
128
+ end
129
+
130
+ def process_lines(lines)
131
+ count = {}
132
+ count.default = 0
133
+ lines.each do |line|
134
+ line.split do |word|
135
+ tokenized = self.tokenize(word)
136
+ count[tokenized] += 1
137
+ end
138
+ end
139
+ begin
140
+ puts count if @verbose
141
+ self.generate_cloud(count)
142
+ rescue ArgumentError => e
143
+ raise e
144
+ else
145
+ puts "Succesfully finished processing" if @verbose
146
+ end
147
+ return count
148
+ end
149
+
150
+ def process_text(file)
151
+ puts "Processing txt file #{file}" if @verbose
152
+ lines = IO.readlines(file)
153
+ return self.process_lines(lines)
154
+ end
155
+
156
+ def process_docx(file)
157
+ require "zip"
158
+ puts "Processing docx file #{file}" if @verbose
159
+ binary = File.open(file, 'rb') { |f| f.read }
160
+ Zip::File.open_buffer(binary) do |zip|
161
+ doc = zip.find { |entry| entry.name == 'word/document.xml'}
162
+ text = doc.get_input_stream.read
163
+ # TODO: check if styling is supported
164
+ lines = text.scan(/(?<=<w:t>).+?(?=<\/w:t>)/)
165
+ return self.process_lines(lines)
166
+ end
167
+ end
168
+
169
+ def process
170
+ Array(@files).each do |f|
171
+ case f
172
+ when /\.(txt|text)\z/i
173
+ return self.process_text(f)
174
+ when /\.docx\z/i
175
+ return self.process_docx(f)
176
+ else
177
+ raise ArgumentError, "File type not supported."
178
+ end
179
+ end
180
+ end
181
+ end
182
+ end
@@ -0,0 +1,15 @@
1
+ module Wordgraph
2
+ module Mathwg
3
+ def lerp(a, b, t)
4
+ (1 - t) * a + b * t
5
+ end
6
+
7
+ def invLerp(a, b, v)
8
+ a === b ? 0 : (v - a).to_f / (b - a)
9
+ end
10
+
11
+ def remap(from_min, from_max, to_min, to_max, value)
12
+ lerp(to_min, to_max, invLerp(from_min, from_max, value))
13
+ end
14
+ end
15
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Wordgraph
4
- VERSION = "0.1.0"
4
+ VERSION = "0.6.0"
5
5
  end
data/lib/wordgraph.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative "wordgraph/version"
4
+ require_relative "wordgraph/cli"
4
5
 
5
6
  module Wordgraph
6
7
  class Error < StandardError; end
data/wordgraph.gemspec ADDED
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+ # coding: utf-8
3
+
4
+ lib = File.expand_path("../lib/", __FILE__)
5
+ $LOAD_PATH.unshift lib unless $LOAD_PATH.include?(lib)
6
+ require "wordgraph/version"
7
+
8
+ Gem::Specification.new do |spec|
9
+ spec.name = "wordgraph"
10
+ spec.version = Wordgraph::VERSION
11
+ spec.licenses = %w(MIT)
12
+ spec.authors = ["marm00"]
13
+ spec.email = [""]
14
+ spec.description = "Wordgraph"
15
+ spec.summary = spec.description
16
+ spec.homepage = "https://github.com/marm00/wordgraph"
17
+
18
+ spec.metadata = {
19
+ "bug_tracker_uri" => "https://github.com/marm00/wordgraph/issues",
20
+ "changelog_uri" => spec.homepage,
21
+ "documentation_uri" => spec.homepage,
22
+ "source_code_uri" => "https://github.com/marm00/wordgraph/tree/main",
23
+ "wiki_uri" => spec.homepage,
24
+ }
25
+
26
+ spec.required_ruby_version = ">= 3.1.0"
27
+
28
+ spec.files = %w(.document wordgraph.gemspec) + Dir["*.md", "bin/*", "lib/**/*.rb", "fixtures/**/*"]
29
+ spec.executables = %w(wordgraph)
30
+ spec.require_paths = %w(lib)
31
+
32
+ spec.add_dependency "optparse", "~> 0.6.0"
33
+ end
metadata CHANGED
@@ -1,32 +1,56 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wordgraph
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - marm00
8
- bindir: exe
8
+ bindir: bin
9
9
  cert_chain: []
10
- date: 2025-01-14 00:00:00.000000000 Z
11
- dependencies: []
12
- executables: []
10
+ date: 2025-01-24 00:00:00.000000000 Z
11
+ dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: optparse
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - "~>"
17
+ - !ruby/object:Gem::Version
18
+ version: 0.6.0
19
+ type: :runtime
20
+ prerelease: false
21
+ version_requirements: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - "~>"
24
+ - !ruby/object:Gem::Version
25
+ version: 0.6.0
26
+ description: Wordgraph
27
+ email:
28
+ - ''
29
+ executables:
30
+ - wordgraph
13
31
  extensions: []
14
32
  extra_rdoc_files: []
15
33
  files:
16
- - ".rspec"
17
- - ".ruby-version"
18
- - LICENSE
34
+ - ".document"
19
35
  - README.md
20
- - Rakefile
36
+ - bin/console
37
+ - bin/setup
38
+ - bin/wordgraph
21
39
  - lib/wordgraph.rb
22
40
  - lib/wordgraph/cli.rb
41
+ - lib/wordgraph/core.rb
42
+ - lib/wordgraph/mathwg.rb
23
43
  - lib/wordgraph/version.rb
24
- - sig/wordgraph.rbs
44
+ - wordgraph.gemspec
25
45
  homepage: https://github.com/marm00/wordgraph
26
- licenses: []
46
+ licenses:
47
+ - MIT
27
48
  metadata:
28
- homepage_uri: https://github.com/marm00/wordgraph
29
- source_code_uri: https://github.com/marm00/wordgraph
49
+ bug_tracker_uri: https://github.com/marm00/wordgraph/issues
50
+ changelog_uri: https://github.com/marm00/wordgraph
51
+ documentation_uri: https://github.com/marm00/wordgraph
52
+ source_code_uri: https://github.com/marm00/wordgraph/tree/main
53
+ wiki_uri: https://github.com/marm00/wordgraph
30
54
  rdoc_options: []
31
55
  require_paths:
32
56
  - lib
@@ -43,5 +67,5 @@ required_rubygems_version: !ruby/object:Gem::Requirement
43
67
  requirements: []
44
68
  rubygems_version: 3.6.2
45
69
  specification_version: 4
46
- summary: Graphs from words.
70
+ summary: Wordgraph
47
71
  test_files: []
data/.rspec DELETED
@@ -1,3 +0,0 @@
1
- --format documentation
2
- --color
3
- --require spec_helper
data/.ruby-version DELETED
@@ -1 +0,0 @@
1
- 3.4.1
data/LICENSE DELETED
@@ -1,21 +0,0 @@
1
- MIT License
2
-
3
- Copyright (c) 2025 marm00
4
-
5
- Permission is hereby granted, free of charge, to any person obtaining a copy
6
- of this software and associated documentation files (the "Software"), to deal
7
- in the Software without restriction, including without limitation the rights
8
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
- copies of the Software, and to permit persons to whom the Software is
10
- furnished to do so, subject to the following conditions:
11
-
12
- The above copyright notice and this permission notice shall be included in all
13
- copies or substantial portions of the Software.
14
-
15
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
- SOFTWARE.
data/Rakefile DELETED
@@ -1,8 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "bundler/gem_tasks"
4
- require "rspec/core/rake_task"
5
-
6
- RSpec::Core::RakeTask.new(:spec)
7
-
8
- task default: :spec
data/sig/wordgraph.rbs DELETED
@@ -1,4 +0,0 @@
1
- module Wordgraph
2
- VERSION: String
3
- # See the writing guide of rbs: https://github.com/ruby/rbs#guides
4
- end