ruby_wordcram 1.0.1 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +3 -0
  3. data/.mvn/extensions.xml +8 -0
  4. data/.mvn/wrapper/maven-wrapper.properties +1 -0
  5. data/Rakefile +28 -5
  6. data/docs/_posts/2017-03-07-getting_started.md +3 -2
  7. data/docs/_posts/2017-03-07-under_the_hood.md +33 -0
  8. data/lib/WordCram.jar +0 -0
  9. data/lib/jsoup-1.10.2.jar +0 -0
  10. data/lib/ruby_wordcram/version.rb +1 -1
  11. data/lib/ruby_wordcram.rb +1 -2
  12. data/pom.rb +53 -0
  13. data/pom.xml +87 -0
  14. data/ruby_wordcram.gemspec +1 -2
  15. data/src/cue/lang/Counter.java +141 -0
  16. data/src/cue/lang/IterableText.java +10 -0
  17. data/src/cue/lang/NGramIterator.java +151 -0
  18. data/src/cue/lang/SentenceIterator.java +86 -0
  19. data/src/cue/lang/WordIterator.java +60 -0
  20. data/src/cue/lang/stop/StopWords.java +114 -0
  21. data/src/cue/lang/stop/arabic +351 -0
  22. data/src/cue/lang/stop/armenian +45 -0
  23. data/src/cue/lang/stop/catalan +219 -0
  24. data/src/cue/lang/stop/croatian +2024 -0
  25. data/src/cue/lang/stop/czech +256 -0
  26. data/src/cue/lang/stop/danish +94 -0
  27. data/src/cue/lang/stop/dutch +107 -0
  28. data/src/cue/lang/stop/english +183 -0
  29. data/src/cue/lang/stop/esperanto +180 -0
  30. data/src/cue/lang/stop/farsi +966 -0
  31. data/src/cue/lang/stop/finnish +235 -0
  32. data/src/cue/lang/stop/french +543 -0
  33. data/src/cue/lang/stop/german +231 -0
  34. data/src/cue/lang/stop/greek +637 -0
  35. data/src/cue/lang/stop/hebrew +220 -0
  36. data/src/cue/lang/stop/hindi +97 -0
  37. data/src/cue/lang/stop/hungarian +202 -0
  38. data/src/cue/lang/stop/italian +279 -0
  39. data/src/cue/lang/stop/latin +1 -0
  40. data/src/cue/lang/stop/norwegian +176 -0
  41. data/src/cue/lang/stop/polish +138 -0
  42. data/src/cue/lang/stop/portuguese +204 -0
  43. data/src/cue/lang/stop/romanian +284 -0
  44. data/src/cue/lang/stop/russian +652 -0
  45. data/src/cue/lang/stop/slovak +110 -0
  46. data/src/cue/lang/stop/slovenian +448 -0
  47. data/src/cue/lang/stop/spanish +308 -0
  48. data/src/cue/lang/stop/swedish +114 -0
  49. data/src/cue/lang/stop/turkish +117 -0
  50. data/src/cue/lang/unicode/BlockUtil.java +103 -0
  51. data/src/cue/lang/unicode/Normalizer.java +55 -0
  52. data/src/cue/lang/unicode/Normalizer6.java +32 -0
  53. data/src/license.txt +201 -0
  54. data/src/wordcram/Anglers.java +137 -0
  55. data/src/wordcram/BBTree.java +133 -0
  56. data/src/wordcram/BBTreeBuilder.java +61 -0
  57. data/src/wordcram/Colorers.java +52 -0
  58. data/src/wordcram/EngineWord.java +73 -0
  59. data/src/wordcram/Fonters.java +17 -0
  60. data/src/wordcram/HsbWordColorer.java +28 -0
  61. data/src/wordcram/ImageShaper.java +91 -0
  62. data/src/wordcram/Observer.java +9 -0
  63. data/src/wordcram/PlacerHeatMap.java +134 -0
  64. data/src/wordcram/Placers.java +74 -0
  65. data/src/wordcram/PlottingWordNudger.java +38 -0
  66. data/src/wordcram/PlottingWordPlacer.java +36 -0
  67. data/src/wordcram/ProcessingWordRenderer.java +42 -0
  68. data/src/wordcram/RandomWordNudger.java +44 -0
  69. data/src/wordcram/RenderOptions.java +10 -0
  70. data/src/wordcram/ShapeBasedPlacer.java +66 -0
  71. data/src/wordcram/Sizers.java +54 -0
  72. data/src/wordcram/SketchCallbackObserver.java +70 -0
  73. data/src/wordcram/SpiralWordNudger.java +31 -0
  74. data/src/wordcram/SvgWordRenderer.java +110 -0
  75. data/src/wordcram/SwirlWordPlacer.java +25 -0
  76. data/src/wordcram/UpperLeftWordPlacer.java +27 -0
  77. data/src/wordcram/WaveWordPlacer.java +25 -0
  78. data/src/wordcram/Word.java +357 -0
  79. data/src/wordcram/WordAngler.java +20 -0
  80. data/src/wordcram/WordArray.java +18 -0
  81. data/src/wordcram/WordBag.java +31 -0
  82. data/src/wordcram/WordColorer.java +25 -0
  83. data/src/wordcram/WordCounter.java +96 -0
  84. data/src/wordcram/WordCram.java +920 -0
  85. data/src/wordcram/WordCramEngine.java +196 -0
  86. data/src/wordcram/WordFonter.java +24 -0
  87. data/src/wordcram/WordNudger.java +44 -0
  88. data/src/wordcram/WordPlacer.java +44 -0
  89. data/src/wordcram/WordRenderer.java +10 -0
  90. data/src/wordcram/WordShaper.java +78 -0
  91. data/src/wordcram/WordSizer.java +46 -0
  92. data/src/wordcram/WordSkipReason.java +42 -0
  93. data/src/wordcram/WordSorterAndScaler.java +31 -0
  94. data/src/wordcram/WordSource.java +5 -0
  95. data/src/wordcram/text/Html.java +15 -0
  96. data/src/wordcram/text/Html2Text.java +17 -0
  97. data/src/wordcram/text/Text.java +15 -0
  98. data/src/wordcram/text/TextFile.java +23 -0
  99. data/src/wordcram/text/TextSource.java +5 -0
  100. data/src/wordcram/text/WebPage.java +23 -0
  101. metadata +94 -5
  102. data/lib/cue.language.jar +0 -0
  103. data/lib/jsoup-1.7.2.jar +0 -0
  104. data/vendors/Rakefile +0 -51
@@ -0,0 +1,42 @@
1
+ package wordcram;
2
+
3
+ public enum WordSkipReason {
4
+
5
+ /**
6
+ * Skip Reason: the Word was skipped because the WordCram had already drawn
7
+ * the {@link WordCram#maxNumberOfWordsToDraw(int)}.
8
+ * It's really about the Word's rank, its position in the list once the
9
+ * words are sorted by weight: if its rank is greater than the value passed
10
+ * to maxNumberOfWordsToDraw(), then it'll be skipped, and this will be the
11
+ * reason code.
12
+ */
13
+ WAS_OVER_MAX_NUMBER_OF_WORDS("we already reached the maxNumberOfWordsToDraw threshold"),
14
+
15
+ /**
16
+ * Skip Reason: the Word's shape was smaller than
17
+ * {@link WordCram#minShapeSize(int)}.
18
+ * WordCram will only render words so small, for performance reasons. You
19
+ * can set the minimum Word shape size via
20
+ * {@link WordCram#minShapeSize(int)}.
21
+ */
22
+ SHAPE_WAS_TOO_SMALL("it was below the minShapeSize threshold"),
23
+
24
+ /**
25
+ * Skip Reason: WordCram tried placing the Word, but it couldn't find a
26
+ * clear spot. The {@link WordNudger} nudged it around a bunch (according
27
+ * to {@link WordCram#maxAttemptsToPlaceWord(int)}, if it was set), but
28
+ * there was just no room.
29
+ */
30
+ NO_SPACE("there wasn't enough space near where you wanted it placed");
31
+
32
+ String reason;
33
+
34
+ WordSkipReason(String reason) {
35
+ this.reason = reason;
36
+ }
37
+
38
+ @Override
39
+ public String toString() {
40
+ return reason;
41
+ }
42
+ }
@@ -0,0 +1,31 @@
1
+ package wordcram;
2
+
3
+ import java.util.Arrays;
4
+
5
+ class WordSorterAndScaler {
6
+
7
+ public Word[] sortAndScale(Word[] rawWords) {
8
+ if (rawWords.length == 0) {
9
+ return rawWords;
10
+ }
11
+
12
+ Word[] words = copy(rawWords);
13
+ Arrays.sort(words);
14
+ float maxWeight = words[0].weight;
15
+
16
+ for (Word word : words) {
17
+ word.weight = word.weight / maxWeight;
18
+ }
19
+
20
+ return words;
21
+ }
22
+
23
+ private Word[] copy(Word[] rawWords) {
24
+
25
+ // was Arrays.copyOf(rawWords, rawWords.length); - removed for Java 1.5 compatibility.
26
+
27
+ Word[] copy = new Word[rawWords.length];
28
+ System.arraycopy(rawWords, 0, copy, 0, copy.length);
29
+ return copy;
30
+ }
31
+ }
@@ -0,0 +1,5 @@
1
+ package wordcram;
2
+
3
+ public interface WordSource {
4
+ public Word[] getWords();
5
+ }
@@ -0,0 +1,15 @@
1
+ package wordcram.text;
2
+
3
+ public class Html implements TextSource {
4
+
5
+ private final String src;
6
+
7
+ public Html(String htmlSrc) {
8
+ src = htmlSrc;
9
+ }
10
+
11
+ @Override
12
+ public String getText() {
13
+ return new Html2Text().text(src, null);
14
+ }
15
+ }
@@ -0,0 +1,17 @@
1
+ package wordcram.text;
2
+
3
+ import org.jsoup.Jsoup;
4
+ import org.jsoup.nodes.Document;
5
+
6
+ class Html2Text {
7
+ public String text(String html, String cssSelector) {
8
+ Document doc = Jsoup.parse(html);
9
+
10
+ if (cssSelector == null) {
11
+ return doc.text();
12
+ }
13
+ else {
14
+ return doc.select(cssSelector).text();
15
+ }
16
+ }
17
+ }
@@ -0,0 +1,15 @@
1
+ package wordcram.text;
2
+
3
+ public class Text implements TextSource {
4
+
5
+ private final String text;
6
+
7
+ public Text(String _text) {
8
+ text = _text;
9
+ }
10
+
11
+ @Override
12
+ public String getText() {
13
+ return text;
14
+ }
15
+ }
@@ -0,0 +1,23 @@
1
+ package wordcram.text;
2
+
3
+ import processing.core.PApplet;
4
+
5
+ public class TextFile implements TextSource {
6
+
7
+ private final String path;
8
+
9
+ // TODO if we move all .text.* classes into WordCram, we can kill this, and
10
+ // use pkg-local methods for setting the parent...
11
+ private final PApplet parent;
12
+
13
+ public TextFile(String path, PApplet parent) {
14
+ this.path = path;
15
+ this.parent = parent;
16
+ }
17
+
18
+ @Override
19
+ public String getText() {
20
+ return PApplet.join(parent.loadStrings(path), ' ');
21
+ }
22
+
23
+ }
@@ -0,0 +1,5 @@
1
+ package wordcram.text;
2
+
3
+ public interface TextSource {
4
+ public String getText();
5
+ }
@@ -0,0 +1,23 @@
1
+ package wordcram.text;
2
+
3
+ import processing.core.PApplet;
4
+
5
+ public class WebPage implements TextSource {
6
+
7
+ private final String url;
8
+ private final String cssSelector;
9
+ private final PApplet parent;
10
+
11
+ public WebPage(String url, String cssSelector, PApplet parent) {
12
+ this.url = url;
13
+ this.cssSelector = cssSelector;
14
+ this.parent = parent;
15
+ }
16
+
17
+ @Override
18
+ public String getText() {
19
+ String html = PApplet.join(parent.loadStrings(url), ' ');
20
+ return new Html2Text().text(html, cssSelector);
21
+ }
22
+
23
+ }
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ruby_wordcram
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.1
4
+ version: 2.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dan Bernier
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2017-03-08 00:00:00.000000000 Z
13
+ date: 2017-03-11 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: rake
@@ -42,6 +42,8 @@ extra_rdoc_files:
42
42
  - LICENSE
43
43
  files:
44
44
  - ".gitignore"
45
+ - ".mvn/extensions.xml"
46
+ - ".mvn/wrapper/maven-wrapper.properties"
45
47
  - CHANGELOG.md
46
48
  - LICENSE
47
49
  - README.md
@@ -59,6 +61,7 @@ files:
59
61
  - docs/_layouts/page.html
60
62
  - docs/_layouts/post.html
61
63
  - docs/_posts/2017-03-07-getting_started.md
64
+ - docs/_posts/2017-03-07-under_the_hood.md
62
65
  - docs/_sass/_base.scss
63
66
  - docs/_sass/_layout.scss
64
67
  - docs/_sass/_syntax-highlighting.scss
@@ -70,12 +73,98 @@ files:
70
73
  - example/data/MINYN___.TTF
71
74
  - example/test.rb
72
75
  - lib/WordCram.jar
73
- - lib/cue.language.jar
74
- - lib/jsoup-1.7.2.jar
76
+ - lib/jsoup-1.10.2.jar
75
77
  - lib/ruby_wordcram.rb
76
78
  - lib/ruby_wordcram/version.rb
79
+ - pom.rb
80
+ - pom.xml
77
81
  - ruby_wordcram.gemspec
78
- - vendors/Rakefile
82
+ - src/cue/lang/Counter.java
83
+ - src/cue/lang/IterableText.java
84
+ - src/cue/lang/NGramIterator.java
85
+ - src/cue/lang/SentenceIterator.java
86
+ - src/cue/lang/WordIterator.java
87
+ - src/cue/lang/stop/StopWords.java
88
+ - src/cue/lang/stop/arabic
89
+ - src/cue/lang/stop/armenian
90
+ - src/cue/lang/stop/catalan
91
+ - src/cue/lang/stop/croatian
92
+ - src/cue/lang/stop/czech
93
+ - src/cue/lang/stop/danish
94
+ - src/cue/lang/stop/dutch
95
+ - src/cue/lang/stop/english
96
+ - src/cue/lang/stop/esperanto
97
+ - src/cue/lang/stop/farsi
98
+ - src/cue/lang/stop/finnish
99
+ - src/cue/lang/stop/french
100
+ - src/cue/lang/stop/german
101
+ - src/cue/lang/stop/greek
102
+ - src/cue/lang/stop/hebrew
103
+ - src/cue/lang/stop/hindi
104
+ - src/cue/lang/stop/hungarian
105
+ - src/cue/lang/stop/italian
106
+ - src/cue/lang/stop/latin
107
+ - src/cue/lang/stop/norwegian
108
+ - src/cue/lang/stop/polish
109
+ - src/cue/lang/stop/portuguese
110
+ - src/cue/lang/stop/romanian
111
+ - src/cue/lang/stop/russian
112
+ - src/cue/lang/stop/slovak
113
+ - src/cue/lang/stop/slovenian
114
+ - src/cue/lang/stop/spanish
115
+ - src/cue/lang/stop/swedish
116
+ - src/cue/lang/stop/turkish
117
+ - src/cue/lang/unicode/BlockUtil.java
118
+ - src/cue/lang/unicode/Normalizer.java
119
+ - src/cue/lang/unicode/Normalizer6.java
120
+ - src/license.txt
121
+ - src/wordcram/Anglers.java
122
+ - src/wordcram/BBTree.java
123
+ - src/wordcram/BBTreeBuilder.java
124
+ - src/wordcram/Colorers.java
125
+ - src/wordcram/EngineWord.java
126
+ - src/wordcram/Fonters.java
127
+ - src/wordcram/HsbWordColorer.java
128
+ - src/wordcram/ImageShaper.java
129
+ - src/wordcram/Observer.java
130
+ - src/wordcram/PlacerHeatMap.java
131
+ - src/wordcram/Placers.java
132
+ - src/wordcram/PlottingWordNudger.java
133
+ - src/wordcram/PlottingWordPlacer.java
134
+ - src/wordcram/ProcessingWordRenderer.java
135
+ - src/wordcram/RandomWordNudger.java
136
+ - src/wordcram/RenderOptions.java
137
+ - src/wordcram/ShapeBasedPlacer.java
138
+ - src/wordcram/Sizers.java
139
+ - src/wordcram/SketchCallbackObserver.java
140
+ - src/wordcram/SpiralWordNudger.java
141
+ - src/wordcram/SvgWordRenderer.java
142
+ - src/wordcram/SwirlWordPlacer.java
143
+ - src/wordcram/UpperLeftWordPlacer.java
144
+ - src/wordcram/WaveWordPlacer.java
145
+ - src/wordcram/Word.java
146
+ - src/wordcram/WordAngler.java
147
+ - src/wordcram/WordArray.java
148
+ - src/wordcram/WordBag.java
149
+ - src/wordcram/WordColorer.java
150
+ - src/wordcram/WordCounter.java
151
+ - src/wordcram/WordCram.java
152
+ - src/wordcram/WordCramEngine.java
153
+ - src/wordcram/WordFonter.java
154
+ - src/wordcram/WordNudger.java
155
+ - src/wordcram/WordPlacer.java
156
+ - src/wordcram/WordRenderer.java
157
+ - src/wordcram/WordShaper.java
158
+ - src/wordcram/WordSizer.java
159
+ - src/wordcram/WordSkipReason.java
160
+ - src/wordcram/WordSorterAndScaler.java
161
+ - src/wordcram/WordSource.java
162
+ - src/wordcram/text/Html.java
163
+ - src/wordcram/text/Html2Text.java
164
+ - src/wordcram/text/Text.java
165
+ - src/wordcram/text/TextFile.java
166
+ - src/wordcram/text/TextSource.java
167
+ - src/wordcram/text/WebPage.java
79
168
  homepage: http://ruby-processing.github.io/WordCram/
80
169
  licenses:
81
170
  - Apache-2.0
data/lib/cue.language.jar DELETED
Binary file
data/lib/jsoup-1.7.2.jar DELETED
Binary file
data/vendors/Rakefile DELETED
@@ -1,51 +0,0 @@
1
- require 'rake/clean'
2
-
3
- WARNING = <<-EOS.freeze
4
- WARNING: you may not have wget installed
5
-
6
- EOS
7
-
8
- WORDCRAM_VERSION = '1.0.0'
9
- CLOBBER.include("wordcram.#{WORDCRAM_VERSION}.zip")
10
-
11
- desc "download, and copy to wordcram/lib"
12
- task :default => [:download, :unpack_library, :copy_jars]
13
-
14
- desc "download wordcram upstream sources"
15
- task :download => ["wordcram.#{WORDCRAM_VERSION}.zip"]
16
-
17
- file "wordcram.#{WORDCRAM_VERSION}.zip" do
18
- begin
19
- sh "wget http://wordcram.org/wordcram.#{WORDCRAM_VERSION}.zip"
20
- rescue
21
- warn(WARNING)
22
- end
23
- check_sha256("wordcram.#{WORDCRAM_VERSION}.zip", "d6b936db3628806099eba3f309fad81dae7f3db5e2a2846742c959b03eb0d25f")
24
- end
25
-
26
- desc "unpack wordcram library"
27
- task :unpack_library do
28
- sh "unzip wordcram.#{WORDCRAM_VERSION}.zip"
29
- end
30
-
31
- directory "../lib"
32
-
33
- desc "copy jars"
34
- task :copy_jars => ["../lib"] do
35
- sh "cp -v WordCram/library/WordCram.jar ../lib/WordCram.jar"
36
- sh "cp -v WordCram/library/cue.language.jar ../lib/cue.language.jar"
37
- sh "cp -v WordCram/library/jsoup-1.7.2.jar ../lib/jsoup-1.7.2.jar"
38
- end
39
-
40
- def check_sha256(filename, expected_hash)
41
- require "digest"
42
- sha256 = Digest::SHA256.new
43
- File.open(filename, "r") do |f|
44
- while buf = f.read(4096)
45
- sha256.update(buf)
46
- end
47
- end
48
- if sha256.hexdigest != expected_hash
49
- raise "bad sha256 checksum for #{filename} (expected #{expected_hash} got #{sha256.hexdigest})"
50
- end
51
- end