ruby_wordcram 1.0.1 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +3 -0
  3. data/.mvn/extensions.xml +8 -0
  4. data/.mvn/wrapper/maven-wrapper.properties +1 -0
  5. data/Rakefile +28 -5
  6. data/docs/_posts/2017-03-07-getting_started.md +3 -2
  7. data/docs/_posts/2017-03-07-under_the_hood.md +33 -0
  8. data/lib/WordCram.jar +0 -0
  9. data/lib/jsoup-1.10.2.jar +0 -0
  10. data/lib/ruby_wordcram/version.rb +1 -1
  11. data/lib/ruby_wordcram.rb +1 -2
  12. data/pom.rb +53 -0
  13. data/pom.xml +87 -0
  14. data/ruby_wordcram.gemspec +1 -2
  15. data/src/cue/lang/Counter.java +141 -0
  16. data/src/cue/lang/IterableText.java +10 -0
  17. data/src/cue/lang/NGramIterator.java +151 -0
  18. data/src/cue/lang/SentenceIterator.java +86 -0
  19. data/src/cue/lang/WordIterator.java +60 -0
  20. data/src/cue/lang/stop/StopWords.java +114 -0
  21. data/src/cue/lang/stop/arabic +351 -0
  22. data/src/cue/lang/stop/armenian +45 -0
  23. data/src/cue/lang/stop/catalan +219 -0
  24. data/src/cue/lang/stop/croatian +2024 -0
  25. data/src/cue/lang/stop/czech +256 -0
  26. data/src/cue/lang/stop/danish +94 -0
  27. data/src/cue/lang/stop/dutch +107 -0
  28. data/src/cue/lang/stop/english +183 -0
  29. data/src/cue/lang/stop/esperanto +180 -0
  30. data/src/cue/lang/stop/farsi +966 -0
  31. data/src/cue/lang/stop/finnish +235 -0
  32. data/src/cue/lang/stop/french +543 -0
  33. data/src/cue/lang/stop/german +231 -0
  34. data/src/cue/lang/stop/greek +637 -0
  35. data/src/cue/lang/stop/hebrew +220 -0
  36. data/src/cue/lang/stop/hindi +97 -0
  37. data/src/cue/lang/stop/hungarian +202 -0
  38. data/src/cue/lang/stop/italian +279 -0
  39. data/src/cue/lang/stop/latin +1 -0
  40. data/src/cue/lang/stop/norwegian +176 -0
  41. data/src/cue/lang/stop/polish +138 -0
  42. data/src/cue/lang/stop/portuguese +204 -0
  43. data/src/cue/lang/stop/romanian +284 -0
  44. data/src/cue/lang/stop/russian +652 -0
  45. data/src/cue/lang/stop/slovak +110 -0
  46. data/src/cue/lang/stop/slovenian +448 -0
  47. data/src/cue/lang/stop/spanish +308 -0
  48. data/src/cue/lang/stop/swedish +114 -0
  49. data/src/cue/lang/stop/turkish +117 -0
  50. data/src/cue/lang/unicode/BlockUtil.java +103 -0
  51. data/src/cue/lang/unicode/Normalizer.java +55 -0
  52. data/src/cue/lang/unicode/Normalizer6.java +32 -0
  53. data/src/license.txt +201 -0
  54. data/src/wordcram/Anglers.java +137 -0
  55. data/src/wordcram/BBTree.java +133 -0
  56. data/src/wordcram/BBTreeBuilder.java +61 -0
  57. data/src/wordcram/Colorers.java +52 -0
  58. data/src/wordcram/EngineWord.java +73 -0
  59. data/src/wordcram/Fonters.java +17 -0
  60. data/src/wordcram/HsbWordColorer.java +28 -0
  61. data/src/wordcram/ImageShaper.java +91 -0
  62. data/src/wordcram/Observer.java +9 -0
  63. data/src/wordcram/PlacerHeatMap.java +134 -0
  64. data/src/wordcram/Placers.java +74 -0
  65. data/src/wordcram/PlottingWordNudger.java +38 -0
  66. data/src/wordcram/PlottingWordPlacer.java +36 -0
  67. data/src/wordcram/ProcessingWordRenderer.java +42 -0
  68. data/src/wordcram/RandomWordNudger.java +44 -0
  69. data/src/wordcram/RenderOptions.java +10 -0
  70. data/src/wordcram/ShapeBasedPlacer.java +66 -0
  71. data/src/wordcram/Sizers.java +54 -0
  72. data/src/wordcram/SketchCallbackObserver.java +70 -0
  73. data/src/wordcram/SpiralWordNudger.java +31 -0
  74. data/src/wordcram/SvgWordRenderer.java +110 -0
  75. data/src/wordcram/SwirlWordPlacer.java +25 -0
  76. data/src/wordcram/UpperLeftWordPlacer.java +27 -0
  77. data/src/wordcram/WaveWordPlacer.java +25 -0
  78. data/src/wordcram/Word.java +357 -0
  79. data/src/wordcram/WordAngler.java +20 -0
  80. data/src/wordcram/WordArray.java +18 -0
  81. data/src/wordcram/WordBag.java +31 -0
  82. data/src/wordcram/WordColorer.java +25 -0
  83. data/src/wordcram/WordCounter.java +96 -0
  84. data/src/wordcram/WordCram.java +920 -0
  85. data/src/wordcram/WordCramEngine.java +196 -0
  86. data/src/wordcram/WordFonter.java +24 -0
  87. data/src/wordcram/WordNudger.java +44 -0
  88. data/src/wordcram/WordPlacer.java +44 -0
  89. data/src/wordcram/WordRenderer.java +10 -0
  90. data/src/wordcram/WordShaper.java +78 -0
  91. data/src/wordcram/WordSizer.java +46 -0
  92. data/src/wordcram/WordSkipReason.java +42 -0
  93. data/src/wordcram/WordSorterAndScaler.java +31 -0
  94. data/src/wordcram/WordSource.java +5 -0
  95. data/src/wordcram/text/Html.java +15 -0
  96. data/src/wordcram/text/Html2Text.java +17 -0
  97. data/src/wordcram/text/Text.java +15 -0
  98. data/src/wordcram/text/TextFile.java +23 -0
  99. data/src/wordcram/text/TextSource.java +5 -0
  100. data/src/wordcram/text/WebPage.java +23 -0
  101. metadata +94 -5
  102. data/lib/cue.language.jar +0 -0
  103. data/lib/jsoup-1.7.2.jar +0 -0
  104. data/vendors/Rakefile +0 -51
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8dffd63b98ef0a8e0cd3336d91c64662079ec4e0
4
- data.tar.gz: 1b31a614bde993c6dec20492cb7bef6eab9a0851
3
+ metadata.gz: 405e8f60fe4dbae1f2e01fec268f218951e038e0
4
+ data.tar.gz: '07490b56020c882ec52d2ce621467ba880d6cc5b'
5
5
  SHA512:
6
- metadata.gz: d545addf7573a8d98ffd4db464189718f8cce0f7666575916446d9a7ea358195990e65bbf3dba97ca7f0b88282bda751dd659d0ef99cc56d7b169e01b87de95a
7
- data.tar.gz: eca3feed2429f3aaceb68a7af39d5299f975ce2b51c4ffd545587e1b16c95f1ce427b58cd32587a31c1ef4fb97d93af500ed705c88052a5945704d9bb0e76d45
6
+ metadata.gz: efce853dee8cd3966d66b897c7bc48d119a9e87dcac80f745e89e8c9f6c43b0072e07a8ff0e5cdc042e2a1e2642305200b488470053bd34593d1593ca6a983f3
7
+ data.tar.gz: ce8eaf5968499b5928358d88a1be28f841eefa8f65a2d0c355d7239d9deb55a82873ec4848489db3a3e7ac29fd931298ff9b62828e7f1f9776ce9470538f8e73
data/.gitignore CHANGED
@@ -9,5 +9,8 @@
9
9
  *.ear
10
10
  *.zip
11
11
  vendors/WordCram
12
+ target
12
13
  # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
13
14
  hs_err_pid*
15
+ MANIFEST.MF
16
+
@@ -0,0 +1,8 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <extensions>
3
+ <extension>
4
+ <groupId>io.takari.polyglot</groupId>
5
+ <artifactId>polyglot-ruby</artifactId>
6
+ <version>0.1.19</version>
7
+ </extension>
8
+ </extensions>
@@ -0,0 +1 @@
1
+ distributionUrl=https://repo1.maven.org/maven2/org/apache/maven/apache-maven/3.3.3/apache-maven-3.3.3-bin.zip
data/Rakefile CHANGED
@@ -1,11 +1,34 @@
1
- task default: [:install_jars, :gem]
1
+ require_relative 'lib/ruby_wordcram/version'
2
2
 
3
- desc 'Build gem'
3
+ def create_manifest
4
+ title = 'Implementation-Title: WordCram for JRubyArt and propane)'
5
+ version = format('Implementation-Version: %s', WordCram::VERSION)
6
+ File.open('MANIFEST.MF', 'w') do |f|
7
+ f.puts(title)
8
+ f.puts(version)
9
+ f.puts('Class-Path: jsoup-1.10.2.jar')
10
+ end
11
+ end
12
+
13
+ task default: [:init, :compile, :install]
14
+
15
+ desc 'Create Manifest'
16
+ task :init do
17
+ create_manifest
18
+ end
19
+
20
+ desc 'Install'
21
+ task :install do
22
+ sh 'mvn dependency:copy'
23
+ sh 'mv target/WordCram.jar lib'
24
+ end
25
+
26
+ desc 'Gem'
4
27
  task :gem do
5
28
  sh 'gem build ruby_wordcram.gemspec'
6
29
  end
7
30
 
8
- desc 'Install jars'
9
- task :install_jars do
10
- sh "cd vendors && rake"
31
+ desc 'Compile'
32
+ task :compile do
33
+ sh 'mvn package'
11
34
  end
@@ -47,8 +47,9 @@ end
47
47
  ### Output
48
48
 
49
49
  <img src="{{ site.github.url }}/assets/wordcram.png" />
50
- See more examples [here][examples].
50
+ Follow links for more JRubyArt [examples][examples] or [propane examples][propane examples].
51
51
 
52
- Usage with propane is very similar, to be elaborated.
52
+ Usage with propane is essentially the same but with a class wrapper.
53
53
 
54
54
  [examples]:https://github.com/ruby-processing/JRubyArt-examples/tree/master/external_library/gem/ruby_wordcram/
55
+ [propane examples]:https://github.com/ruby-processing/propane-examples/tree/master/external_library/gem/ruby_wordcram
@@ -0,0 +1,33 @@
1
+ ---
2
+ layout: post
3
+ title: "Under the hood"
4
+ date: 2017-03-07 10:34:13
5
+ categories: wordcram update
6
+ ---
7
+
8
+ ### The required libraries
9
+
10
+ - cue.language.jar
11
+
12
+ Created by Jonathan Feinberg
13
+
14
+ cue.language is a small library of Java code and resources that provides the following basic natural-language processing capabilities:
15
+
16
+ * Tokenizing natural language text into individual words
17
+ * Tokenizing natural language text into sentences
18
+ * Tokenizing natural language text into n-grams (sequences of 2 or more words that appear next to each other in a sentence)
19
+ * Counting strings
20
+ * Detecting which script (alphabet, writing system) is required to represent a text
21
+ * Guessing what language a text is in
22
+ * Customizable "stop word" detection for a variety of languages
23
+
24
+
25
+ - WordCram.jar
26
+
27
+ Created by Dan Bernier
28
+ WordCram lets you generate word clouds in Processing. It does the heavy lifting – text analysis, collision detection – for you, so you can focus on making your word clouds as beautiful, as revealing, or as silly as you like.
29
+
30
+
31
+ - jsoup-1.7.2.jar
32
+
33
+ jsoup is a Java library for working with real-world HTML. It provides a very convenient API for extracting and manipulating data, using the best of DOM, CSS, and jquery-like methods.
data/lib/WordCram.jar CHANGED
Binary file
Binary file
@@ -1,4 +1,4 @@
1
1
  # frozen_string_literal: true
2
2
  module WordCram
3
- VERSION = '1.0.1'.freeze
3
+ VERSION = '2.0.0'.freeze
4
4
  end
data/lib/ruby_wordcram.rb CHANGED
@@ -1,8 +1,7 @@
1
1
  # frozen_string_literal: false
2
2
  if RUBY_PLATFORM == 'java'
3
3
  require 'WordCram.jar'
4
- require 'jsoup-1.7.2.jar'
5
- require 'cue.language.jar'
4
+ require 'jsoup-1.10.2.jar'
6
5
  wc = %w(WordAngler WordColorer WordCram WordFonter WordPlacer WordSkipReason)
7
6
  sh = %w(Colorers ImageShaper Observer Placers Word ShapeBasedPlacer)
8
7
  WC = wc.concat(sh).freeze
data/pom.rb ADDED
@@ -0,0 +1,53 @@
1
+ project 'Wordcram' do
2
+
3
+ model_version '4.0.0'
4
+ id 'wordcram:WordCram:2.0.0'
5
+ packaging 'jar'
6
+
7
+ properties(
8
+ 'source.directory' => 'src',
9
+ 'wordcram.basedir' => '${project.basedir}',
10
+ 'polyglot.dump.pom' => 'pom.xml',
11
+ 'maven.compiler.source' => '1.8',
12
+ 'project.build.sourceEncoding' => 'utf-8',
13
+ 'maven.compiler.target' => '1.8'
14
+ )
15
+
16
+ overrides do
17
+ plugin( :jar,
18
+ 'archive' => {
19
+ 'manifestEntries' => {
20
+ 'Built-By' => 'monkstone'
21
+ }
22
+ } )
23
+ end
24
+
25
+ jar 'org.processing:core:3.3.0'
26
+ jar 'org.jsoup:jsoup:1.10.2'
27
+
28
+ build do
29
+ default_goal 'package'
30
+ source_directory 'source.directory'
31
+ final_name 'WordCram'
32
+ resource do
33
+ directory 'src'
34
+ includes
35
+ excludes '**/*.java'
36
+ end
37
+ end
38
+
39
+ overrides do
40
+ plugin :resources, '2.6'
41
+ plugin :dependency, '2.10' do
42
+ execute_goals( id: 'default-cli',
43
+ artifactItems: [ { groupId: 'org.jsoup',
44
+ artifactId: 'jsoup',
45
+ version: '1.10.2',
46
+ type: 'jar',
47
+ outputDirectory: '${wordcram.basedir}/lib'
48
+ }
49
+ ]
50
+ )
51
+ end
52
+ end
53
+ end
data/pom.xml ADDED
@@ -0,0 +1,87 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!--
3
+
4
+
5
+ DO NOT MODIFIY - GENERATED CODE
6
+
7
+
8
+ -->
9
+ <project xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd" xmlns="http://maven.apache.org/POM/4.0.0"
10
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
11
+ <modelVersion>4.0.0</modelVersion>
12
+ <groupId>wordcram</groupId>
13
+ <artifactId>WordCram</artifactId>
14
+ <version>2.0.0</version>
15
+ <name>Wordcram</name>
16
+ <properties>
17
+ <source.directory>src</source.directory>
18
+ <wordcram.basedir>${project.basedir}</wordcram.basedir>
19
+ <polyglot.dump.pom>pom.xml</polyglot.dump.pom>
20
+ <project.build.sourceEncoding>utf-8</project.build.sourceEncoding>
21
+ <maven.compiler.source>1.8</maven.compiler.source>
22
+ <maven.compiler.target>1.8</maven.compiler.target>
23
+ </properties>
24
+ <dependencies>
25
+ <dependency>
26
+ <groupId>org.processing</groupId>
27
+ <artifactId>core</artifactId>
28
+ <version>3.3.0</version>
29
+ </dependency>
30
+ <dependency>
31
+ <groupId>org.jsoup</groupId>
32
+ <artifactId>jsoup</artifactId>
33
+ <version>1.10.2</version>
34
+ </dependency>
35
+ </dependencies>
36
+ <build>
37
+ <sourceDirectory>source.directory</sourceDirectory>
38
+ <defaultGoal>package</defaultGoal>
39
+ <resources>
40
+ <resource>
41
+ <directory>src</directory>
42
+ <excludes>
43
+ <exclude>**/*.java</exclude>
44
+ </excludes>
45
+ </resource>
46
+ </resources>
47
+ <finalName>WordCram</finalName>
48
+ <pluginManagement>
49
+ <plugins>
50
+ <plugin>
51
+ <artifactId>maven-jar-plugin</artifactId>
52
+ <configuration>
53
+ <archive>
54
+ <manifestEntries>
55
+ <Built-By>monkstone</Built-By>
56
+ </manifestEntries>
57
+ </archive>
58
+ </configuration>
59
+ </plugin>
60
+ <plugin>
61
+ <artifactId>maven-resources-plugin</artifactId>
62
+ <version>2.6</version>
63
+ </plugin>
64
+ <plugin>
65
+ <artifactId>maven-dependency-plugin</artifactId>
66
+ <version>2.10</version>
67
+ <executions>
68
+ <execution>
69
+ <id>default-cli</id>
70
+ <configuration>
71
+ <artifactItems>
72
+ <artifactItem>
73
+ <groupId>org.jsoup</groupId>
74
+ <artifactId>jsoup</artifactId>
75
+ <version>1.10.2</version>
76
+ <type>jar</type>
77
+ <outputDirectory>${wordcram.basedir}/lib</outputDirectory>
78
+ </artifactItem>
79
+ </artifactItems>
80
+ </configuration>
81
+ </execution>
82
+ </executions>
83
+ </plugin>
84
+ </plugins>
85
+ </pluginManagement>
86
+ </build>
87
+ </project>
@@ -18,8 +18,7 @@ Gem::Specification.new do |spec|
18
18
  spec.homepage = 'http://ruby-processing.github.io/WordCram/'
19
19
  spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
20
20
  spec.files << 'lib/WordCram.jar'
21
- spec.files << 'lib/cue.language.jar'
22
- spec.files << 'lib/jsoup-1.7.2.jar'
21
+ spec.files << 'lib/jsoup-1.10.2.jar'
23
22
  spec.require_paths = ['lib']
24
23
  spec.add_development_dependency 'rake', '~> 12', '>= 12.0'
25
24
  end
@@ -0,0 +1,141 @@
1
+ /*
2
+ Copyright 2009 IBM Corp
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+ */
16
+
17
+ package cue.lang;
18
+
19
+ import java.util.ArrayList;
20
+ import java.util.Collections;
21
+ import java.util.Comparator;
22
+ import java.util.HashMap;
23
+ import java.util.List;
24
+ import java.util.Map;
25
+ import java.util.Map.Entry;
26
+ import java.util.Set;
27
+
28
+ /**
29
+ *
30
+ * @author Jonathan Feinberg <jdf@us.ibm.com>
31
+ * @param <T>
32
+ *
33
+ */
34
+ public class Counter<T> {
35
+ // delegate, don't extend, to prevent unauthorized monkeying with internals
36
+ private final Map<T, Integer> items = new HashMap<T, Integer>();
37
+ private int totalItemCount = 0;
38
+
39
+ public Counter() {
40
+ this.BY_FREQ_DESC = new Comparator<Entry<T, Integer>>() {
41
+ @Override
42
+ public int compare(final Entry<T, Integer> o1,
43
+ final Entry<T, Integer> o2) {
44
+ return o2.getValue() - o1.getValue();
45
+ }
46
+ };
47
+ }
48
+
49
+ public Counter(final Iterable<T> items) {
50
+ this.BY_FREQ_DESC = new Comparator<Entry<T, Integer>>() {
51
+ @Override
52
+ public int compare(final Entry<T, Integer> o1,
53
+ final Entry<T, Integer> o2) {
54
+ return o2.getValue() - o1.getValue();
55
+ }
56
+ };
57
+ noteAll(items);
58
+ }
59
+
60
+ public final void noteAll(final Iterable<T> items) {
61
+ for (final T t : items) {
62
+ note(t, 1);
63
+ }
64
+ }
65
+
66
+ public void note(final T item) {
67
+ note(item, 1);
68
+ }
69
+
70
+ public void note(final T item, final int count) {
71
+ final Integer existingCount = items.get(item);
72
+ if (existingCount != null) {
73
+ items.put(item, existingCount + count);
74
+ } else {
75
+ items.put(item, count);
76
+ }
77
+ totalItemCount += count;
78
+ }
79
+
80
+ public void merge(final Counter<T> c) {
81
+ for (final Entry<T, Integer> e : c.items.entrySet()) {
82
+ note(e.getKey(), e.getValue());
83
+ }
84
+ }
85
+
86
+ public int getTotalItemCount() {
87
+ return totalItemCount;
88
+ }
89
+
90
+ private final Comparator<Entry<T, Integer>> BY_FREQ_DESC;
91
+
92
+ /**
93
+ * @param n
94
+ * @return A list of the min(n, size()) most frequent items
95
+ */
96
+ public List<T> getMostFrequent(final int n) {
97
+ final List<Entry<T, Integer>> all = getAllByFrequency();
98
+ final int resultSize = Math.min(n, items.size());
99
+ final List<T> result = new ArrayList<T>(resultSize);
100
+ for (final Entry<T, Integer> e : all.subList(0, resultSize)) {
101
+ result.add(e.getKey());
102
+ }
103
+ return Collections.unmodifiableList(result);
104
+ }
105
+
106
+ public List<Entry<T, Integer>> getAllByFrequency() {
107
+ final List<Entry<T, Integer>> all = new ArrayList<Entry<T, Integer>>(
108
+ items.entrySet());
109
+ Collections.sort(all, BY_FREQ_DESC);
110
+ return Collections.unmodifiableList(all);
111
+ }
112
+
113
+ public Integer getCount(final T item) {
114
+ final Integer freq = items.get(item);
115
+ if (freq == null) {
116
+ return 0;
117
+ }
118
+ return freq;
119
+ }
120
+
121
+ public void clear() {
122
+ items.clear();
123
+ }
124
+
125
+ public Set<Entry<T, Integer>> entrySet() {
126
+ return Collections.unmodifiableSet(items.entrySet());
127
+ }
128
+
129
+ public Set<T> keySet() {
130
+ return Collections.unmodifiableSet(items.keySet());
131
+ }
132
+
133
+ public List<T> keyList() {
134
+ return getMostFrequent(items.size());
135
+ }
136
+
137
+ @Override
138
+ public String toString() {
139
+ return items.toString();
140
+ }
141
+ }
@@ -0,0 +1,10 @@
1
+ package cue.lang;
2
+
3
+ import java.util.Iterator;
4
+
5
+ abstract class IterableText implements Iterator<String>, Iterable<String> {
6
+ @Override
7
+ public Iterator<String> iterator() {
8
+ return this;
9
+ }
10
+ }