ruby_wordcram 1.0.1 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (104) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +3 -0
  3. data/.mvn/extensions.xml +8 -0
  4. data/.mvn/wrapper/maven-wrapper.properties +1 -0
  5. data/Rakefile +28 -5
  6. data/docs/_posts/2017-03-07-getting_started.md +3 -2
  7. data/docs/_posts/2017-03-07-under_the_hood.md +33 -0
  8. data/lib/WordCram.jar +0 -0
  9. data/lib/jsoup-1.10.2.jar +0 -0
  10. data/lib/ruby_wordcram/version.rb +1 -1
  11. data/lib/ruby_wordcram.rb +1 -2
  12. data/pom.rb +53 -0
  13. data/pom.xml +87 -0
  14. data/ruby_wordcram.gemspec +1 -2
  15. data/src/cue/lang/Counter.java +141 -0
  16. data/src/cue/lang/IterableText.java +10 -0
  17. data/src/cue/lang/NGramIterator.java +151 -0
  18. data/src/cue/lang/SentenceIterator.java +86 -0
  19. data/src/cue/lang/WordIterator.java +60 -0
  20. data/src/cue/lang/stop/StopWords.java +114 -0
  21. data/src/cue/lang/stop/arabic +351 -0
  22. data/src/cue/lang/stop/armenian +45 -0
  23. data/src/cue/lang/stop/catalan +219 -0
  24. data/src/cue/lang/stop/croatian +2024 -0
  25. data/src/cue/lang/stop/czech +256 -0
  26. data/src/cue/lang/stop/danish +94 -0
  27. data/src/cue/lang/stop/dutch +107 -0
  28. data/src/cue/lang/stop/english +183 -0
  29. data/src/cue/lang/stop/esperanto +180 -0
  30. data/src/cue/lang/stop/farsi +966 -0
  31. data/src/cue/lang/stop/finnish +235 -0
  32. data/src/cue/lang/stop/french +543 -0
  33. data/src/cue/lang/stop/german +231 -0
  34. data/src/cue/lang/stop/greek +637 -0
  35. data/src/cue/lang/stop/hebrew +220 -0
  36. data/src/cue/lang/stop/hindi +97 -0
  37. data/src/cue/lang/stop/hungarian +202 -0
  38. data/src/cue/lang/stop/italian +279 -0
  39. data/src/cue/lang/stop/latin +1 -0
  40. data/src/cue/lang/stop/norwegian +176 -0
  41. data/src/cue/lang/stop/polish +138 -0
  42. data/src/cue/lang/stop/portuguese +204 -0
  43. data/src/cue/lang/stop/romanian +284 -0
  44. data/src/cue/lang/stop/russian +652 -0
  45. data/src/cue/lang/stop/slovak +110 -0
  46. data/src/cue/lang/stop/slovenian +448 -0
  47. data/src/cue/lang/stop/spanish +308 -0
  48. data/src/cue/lang/stop/swedish +114 -0
  49. data/src/cue/lang/stop/turkish +117 -0
  50. data/src/cue/lang/unicode/BlockUtil.java +103 -0
  51. data/src/cue/lang/unicode/Normalizer.java +55 -0
  52. data/src/cue/lang/unicode/Normalizer6.java +32 -0
  53. data/src/license.txt +201 -0
  54. data/src/wordcram/Anglers.java +137 -0
  55. data/src/wordcram/BBTree.java +133 -0
  56. data/src/wordcram/BBTreeBuilder.java +61 -0
  57. data/src/wordcram/Colorers.java +52 -0
  58. data/src/wordcram/EngineWord.java +73 -0
  59. data/src/wordcram/Fonters.java +17 -0
  60. data/src/wordcram/HsbWordColorer.java +28 -0
  61. data/src/wordcram/ImageShaper.java +91 -0
  62. data/src/wordcram/Observer.java +9 -0
  63. data/src/wordcram/PlacerHeatMap.java +134 -0
  64. data/src/wordcram/Placers.java +74 -0
  65. data/src/wordcram/PlottingWordNudger.java +38 -0
  66. data/src/wordcram/PlottingWordPlacer.java +36 -0
  67. data/src/wordcram/ProcessingWordRenderer.java +42 -0
  68. data/src/wordcram/RandomWordNudger.java +44 -0
  69. data/src/wordcram/RenderOptions.java +10 -0
  70. data/src/wordcram/ShapeBasedPlacer.java +66 -0
  71. data/src/wordcram/Sizers.java +54 -0
  72. data/src/wordcram/SketchCallbackObserver.java +70 -0
  73. data/src/wordcram/SpiralWordNudger.java +31 -0
  74. data/src/wordcram/SvgWordRenderer.java +110 -0
  75. data/src/wordcram/SwirlWordPlacer.java +25 -0
  76. data/src/wordcram/UpperLeftWordPlacer.java +27 -0
  77. data/src/wordcram/WaveWordPlacer.java +25 -0
  78. data/src/wordcram/Word.java +357 -0
  79. data/src/wordcram/WordAngler.java +20 -0
  80. data/src/wordcram/WordArray.java +18 -0
  81. data/src/wordcram/WordBag.java +31 -0
  82. data/src/wordcram/WordColorer.java +25 -0
  83. data/src/wordcram/WordCounter.java +96 -0
  84. data/src/wordcram/WordCram.java +920 -0
  85. data/src/wordcram/WordCramEngine.java +196 -0
  86. data/src/wordcram/WordFonter.java +24 -0
  87. data/src/wordcram/WordNudger.java +44 -0
  88. data/src/wordcram/WordPlacer.java +44 -0
  89. data/src/wordcram/WordRenderer.java +10 -0
  90. data/src/wordcram/WordShaper.java +78 -0
  91. data/src/wordcram/WordSizer.java +46 -0
  92. data/src/wordcram/WordSkipReason.java +42 -0
  93. data/src/wordcram/WordSorterAndScaler.java +31 -0
  94. data/src/wordcram/WordSource.java +5 -0
  95. data/src/wordcram/text/Html.java +15 -0
  96. data/src/wordcram/text/Html2Text.java +17 -0
  97. data/src/wordcram/text/Text.java +15 -0
  98. data/src/wordcram/text/TextFile.java +23 -0
  99. data/src/wordcram/text/TextSource.java +5 -0
  100. data/src/wordcram/text/WebPage.java +23 -0
  101. metadata +94 -5
  102. data/lib/cue.language.jar +0 -0
  103. data/lib/jsoup-1.7.2.jar +0 -0
  104. data/vendors/Rakefile +0 -51
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8dffd63b98ef0a8e0cd3336d91c64662079ec4e0
4
- data.tar.gz: 1b31a614bde993c6dec20492cb7bef6eab9a0851
3
+ metadata.gz: 405e8f60fe4dbae1f2e01fec268f218951e038e0
4
+ data.tar.gz: '07490b56020c882ec52d2ce621467ba880d6cc5b'
5
5
  SHA512:
6
- metadata.gz: d545addf7573a8d98ffd4db464189718f8cce0f7666575916446d9a7ea358195990e65bbf3dba97ca7f0b88282bda751dd659d0ef99cc56d7b169e01b87de95a
7
- data.tar.gz: eca3feed2429f3aaceb68a7af39d5299f975ce2b51c4ffd545587e1b16c95f1ce427b58cd32587a31c1ef4fb97d93af500ed705c88052a5945704d9bb0e76d45
6
+ metadata.gz: efce853dee8cd3966d66b897c7bc48d119a9e87dcac80f745e89e8c9f6c43b0072e07a8ff0e5cdc042e2a1e2642305200b488470053bd34593d1593ca6a983f3
7
+ data.tar.gz: ce8eaf5968499b5928358d88a1be28f841eefa8f65a2d0c355d7239d9deb55a82873ec4848489db3a3e7ac29fd931298ff9b62828e7f1f9776ce9470538f8e73
data/.gitignore CHANGED
@@ -9,5 +9,8 @@
9
9
  *.ear
10
10
  *.zip
11
11
  vendors/WordCram
12
+ target
12
13
  # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
13
14
  hs_err_pid*
15
+ MANIFEST.MF
16
+
@@ -0,0 +1,8 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <extensions>
3
+ <extension>
4
+ <groupId>io.takari.polyglot</groupId>
5
+ <artifactId>polyglot-ruby</artifactId>
6
+ <version>0.1.19</version>
7
+ </extension>
8
+ </extensions>
@@ -0,0 +1 @@
1
+ distributionUrl=https://repo1.maven.org/maven2/org/apache/maven/apache-maven/3.3.3/apache-maven-3.3.3-bin.zip
data/Rakefile CHANGED
@@ -1,11 +1,34 @@
1
- task default: [:install_jars, :gem]
1
+ require_relative 'lib/ruby_wordcram/version'
2
2
 
3
- desc 'Build gem'
3
+ def create_manifest
4
+ title = 'Implementation-Title: WordCram for JRubyArt and propane)'
5
+ version = format('Implementation-Version: %s', WordCram::VERSION)
6
+ File.open('MANIFEST.MF', 'w') do |f|
7
+ f.puts(title)
8
+ f.puts(version)
9
+ f.puts('Class-Path: jsoup-1.10.2.jar')
10
+ end
11
+ end
12
+
13
+ task default: [:init, :compile, :install]
14
+
15
+ desc 'Create Manifest'
16
+ task :init do
17
+ create_manifest
18
+ end
19
+
20
+ desc 'Install'
21
+ task :install do
22
+ sh 'mvn dependency:copy'
23
+ sh 'mv target/WordCram.jar lib'
24
+ end
25
+
26
+ desc 'Gem'
4
27
  task :gem do
5
28
  sh 'gem build ruby_wordcram.gemspec'
6
29
  end
7
30
 
8
- desc 'Install jars'
9
- task :install_jars do
10
- sh "cd vendors && rake"
31
+ desc 'Compile'
32
+ task :compile do
33
+ sh 'mvn package'
11
34
  end
@@ -47,8 +47,9 @@ end
47
47
  ### Output
48
48
 
49
49
  <img src="{{ site.github.url }}/assets/wordcram.png" />
50
- See more examples [here][examples].
50
+ Follow links for more JRubyArt [examples][examples] or [propane examples][propane examples].
51
51
 
52
- Usage with propane is very similar, to be elaborated.
52
+ Usage with propane is essentially the same but with a class wrapper.
53
53
 
54
54
  [examples]:https://github.com/ruby-processing/JRubyArt-examples/tree/master/external_library/gem/ruby_wordcram/
55
+ [propane examples]:https://github.com/ruby-processing/propane-examples/tree/master/external_library/gem/ruby_wordcram
@@ -0,0 +1,33 @@
1
+ ---
2
+ layout: post
3
+ title: "Under the hood"
4
+ date: 2017-03-07 10:34:13
5
+ categories: wordcram update
6
+ ---
7
+
8
+ ### The required libraries
9
+
10
+ - cue.language.jar
11
+
12
+ Created by Jonathan Feinberg
13
+
14
+ cue.language is a small library of Java code and resources that provides the following basic natural-language processing capabilities:
15
+
16
+ * Tokenizing natural language text into individual words
17
+ * Tokenizing natural language text into sentences
18
+ * Tokenizing natural language text into n-grams (sequences of 2 or more words that appear next to each other in a sentence)
19
+ * Counting strings
20
+ * Detecting which script (alphabet, writing system) is required to represent a text
21
+ * Guessing what language a text is in
22
+ * Customizable "stop word" detection for a variety of languages
23
+
24
+
25
+ - WordCram.jar
26
+
27
+ Created by Dan Bernier
28
+ WordCram lets you generate word clouds in Processing. It does the heavy lifting – text analysis, collision detection – for you, so you can focus on making your word clouds as beautiful, as revealing, or as silly as you like.
29
+
30
+
31
+ - jsoup-1.7.2.jar
32
+
33
+ jsoup is a Java library for working with real-world HTML. It provides a very convenient API for extracting and manipulating data, using the best of DOM, CSS, and jquery-like methods.
data/lib/WordCram.jar CHANGED
Binary file
Binary file
@@ -1,4 +1,4 @@
1
1
  # frozen_string_literal: true
2
2
  module WordCram
3
- VERSION = '1.0.1'.freeze
3
+ VERSION = '2.0.0'.freeze
4
4
  end
data/lib/ruby_wordcram.rb CHANGED
@@ -1,8 +1,7 @@
1
1
  # frozen_string_literal: false
2
2
  if RUBY_PLATFORM == 'java'
3
3
  require 'WordCram.jar'
4
- require 'jsoup-1.7.2.jar'
5
- require 'cue.language.jar'
4
+ require 'jsoup-1.10.2.jar'
6
5
  wc = %w(WordAngler WordColorer WordCram WordFonter WordPlacer WordSkipReason)
7
6
  sh = %w(Colorers ImageShaper Observer Placers Word ShapeBasedPlacer)
8
7
  WC = wc.concat(sh).freeze
data/pom.rb ADDED
@@ -0,0 +1,53 @@
1
+ project 'Wordcram' do
2
+
3
+ model_version '4.0.0'
4
+ id 'wordcram:WordCram:2.0.0'
5
+ packaging 'jar'
6
+
7
+ properties(
8
+ 'source.directory' => 'src',
9
+ 'wordcram.basedir' => '${project.basedir}',
10
+ 'polyglot.dump.pom' => 'pom.xml',
11
+ 'maven.compiler.source' => '1.8',
12
+ 'project.build.sourceEncoding' => 'utf-8',
13
+ 'maven.compiler.target' => '1.8'
14
+ )
15
+
16
+ overrides do
17
+ plugin( :jar,
18
+ 'archive' => {
19
+ 'manifestEntries' => {
20
+ 'Built-By' => 'monkstone'
21
+ }
22
+ } )
23
+ end
24
+
25
+ jar 'org.processing:core:3.3.0'
26
+ jar 'org.jsoup:jsoup:1.10.2'
27
+
28
+ build do
29
+ default_goal 'package'
30
+ source_directory 'source.directory'
31
+ final_name 'WordCram'
32
+ resource do
33
+ directory 'src'
34
+ includes
35
+ excludes '**/*.java'
36
+ end
37
+ end
38
+
39
+ overrides do
40
+ plugin :resources, '2.6'
41
+ plugin :dependency, '2.10' do
42
+ execute_goals( id: 'default-cli',
43
+ artifactItems: [ { groupId: 'org.jsoup',
44
+ artifactId: 'jsoup',
45
+ version: '1.10.2',
46
+ type: 'jar',
47
+ outputDirectory: '${wordcram.basedir}/lib'
48
+ }
49
+ ]
50
+ )
51
+ end
52
+ end
53
+ end
data/pom.xml ADDED
@@ -0,0 +1,87 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!--
3
+
4
+
5
+ DO NOT MODIFIY - GENERATED CODE
6
+
7
+
8
+ -->
9
+ <project xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd" xmlns="http://maven.apache.org/POM/4.0.0"
10
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
11
+ <modelVersion>4.0.0</modelVersion>
12
+ <groupId>wordcram</groupId>
13
+ <artifactId>WordCram</artifactId>
14
+ <version>2.0.0</version>
15
+ <name>Wordcram</name>
16
+ <properties>
17
+ <source.directory>src</source.directory>
18
+ <wordcram.basedir>${project.basedir}</wordcram.basedir>
19
+ <polyglot.dump.pom>pom.xml</polyglot.dump.pom>
20
+ <project.build.sourceEncoding>utf-8</project.build.sourceEncoding>
21
+ <maven.compiler.source>1.8</maven.compiler.source>
22
+ <maven.compiler.target>1.8</maven.compiler.target>
23
+ </properties>
24
+ <dependencies>
25
+ <dependency>
26
+ <groupId>org.processing</groupId>
27
+ <artifactId>core</artifactId>
28
+ <version>3.3.0</version>
29
+ </dependency>
30
+ <dependency>
31
+ <groupId>org.jsoup</groupId>
32
+ <artifactId>jsoup</artifactId>
33
+ <version>1.10.2</version>
34
+ </dependency>
35
+ </dependencies>
36
+ <build>
37
+ <sourceDirectory>source.directory</sourceDirectory>
38
+ <defaultGoal>package</defaultGoal>
39
+ <resources>
40
+ <resource>
41
+ <directory>src</directory>
42
+ <excludes>
43
+ <exclude>**/*.java</exclude>
44
+ </excludes>
45
+ </resource>
46
+ </resources>
47
+ <finalName>WordCram</finalName>
48
+ <pluginManagement>
49
+ <plugins>
50
+ <plugin>
51
+ <artifactId>maven-jar-plugin</artifactId>
52
+ <configuration>
53
+ <archive>
54
+ <manifestEntries>
55
+ <Built-By>monkstone</Built-By>
56
+ </manifestEntries>
57
+ </archive>
58
+ </configuration>
59
+ </plugin>
60
+ <plugin>
61
+ <artifactId>maven-resources-plugin</artifactId>
62
+ <version>2.6</version>
63
+ </plugin>
64
+ <plugin>
65
+ <artifactId>maven-dependency-plugin</artifactId>
66
+ <version>2.10</version>
67
+ <executions>
68
+ <execution>
69
+ <id>default-cli</id>
70
+ <configuration>
71
+ <artifactItems>
72
+ <artifactItem>
73
+ <groupId>org.jsoup</groupId>
74
+ <artifactId>jsoup</artifactId>
75
+ <version>1.10.2</version>
76
+ <type>jar</type>
77
+ <outputDirectory>${wordcram.basedir}/lib</outputDirectory>
78
+ </artifactItem>
79
+ </artifactItems>
80
+ </configuration>
81
+ </execution>
82
+ </executions>
83
+ </plugin>
84
+ </plugins>
85
+ </pluginManagement>
86
+ </build>
87
+ </project>
@@ -18,8 +18,7 @@ Gem::Specification.new do |spec|
18
18
  spec.homepage = 'http://ruby-processing.github.io/WordCram/'
19
19
  spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
20
20
  spec.files << 'lib/WordCram.jar'
21
- spec.files << 'lib/cue.language.jar'
22
- spec.files << 'lib/jsoup-1.7.2.jar'
21
+ spec.files << 'lib/jsoup-1.10.2.jar'
23
22
  spec.require_paths = ['lib']
24
23
  spec.add_development_dependency 'rake', '~> 12', '>= 12.0'
25
24
  end
@@ -0,0 +1,141 @@
1
+ /*
2
+ Copyright 2009 IBM Corp
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+ */
16
+
17
+ package cue.lang;
18
+
19
+ import java.util.ArrayList;
20
+ import java.util.Collections;
21
+ import java.util.Comparator;
22
+ import java.util.HashMap;
23
+ import java.util.List;
24
+ import java.util.Map;
25
+ import java.util.Map.Entry;
26
+ import java.util.Set;
27
+
28
+ /**
29
+ *
30
+ * @author Jonathan Feinberg <jdf@us.ibm.com>
31
+ * @param <T>
32
+ *
33
+ */
34
+ public class Counter<T> {
35
+ // delegate, don't extend, to prevent unauthorized monkeying with internals
36
+ private final Map<T, Integer> items = new HashMap<T, Integer>();
37
+ private int totalItemCount = 0;
38
+
39
+ public Counter() {
40
+ this.BY_FREQ_DESC = new Comparator<Entry<T, Integer>>() {
41
+ @Override
42
+ public int compare(final Entry<T, Integer> o1,
43
+ final Entry<T, Integer> o2) {
44
+ return o2.getValue() - o1.getValue();
45
+ }
46
+ };
47
+ }
48
+
49
+ public Counter(final Iterable<T> items) {
50
+ this.BY_FREQ_DESC = new Comparator<Entry<T, Integer>>() {
51
+ @Override
52
+ public int compare(final Entry<T, Integer> o1,
53
+ final Entry<T, Integer> o2) {
54
+ return o2.getValue() - o1.getValue();
55
+ }
56
+ };
57
+ noteAll(items);
58
+ }
59
+
60
+ public final void noteAll(final Iterable<T> items) {
61
+ for (final T t : items) {
62
+ note(t, 1);
63
+ }
64
+ }
65
+
66
+ public void note(final T item) {
67
+ note(item, 1);
68
+ }
69
+
70
+ public void note(final T item, final int count) {
71
+ final Integer existingCount = items.get(item);
72
+ if (existingCount != null) {
73
+ items.put(item, existingCount + count);
74
+ } else {
75
+ items.put(item, count);
76
+ }
77
+ totalItemCount += count;
78
+ }
79
+
80
+ public void merge(final Counter<T> c) {
81
+ for (final Entry<T, Integer> e : c.items.entrySet()) {
82
+ note(e.getKey(), e.getValue());
83
+ }
84
+ }
85
+
86
+ public int getTotalItemCount() {
87
+ return totalItemCount;
88
+ }
89
+
90
+ private final Comparator<Entry<T, Integer>> BY_FREQ_DESC;
91
+
92
+ /**
93
+ * @param n
94
+ * @return A list of the min(n, size()) most frequent items
95
+ */
96
+ public List<T> getMostFrequent(final int n) {
97
+ final List<Entry<T, Integer>> all = getAllByFrequency();
98
+ final int resultSize = Math.min(n, items.size());
99
+ final List<T> result = new ArrayList<T>(resultSize);
100
+ for (final Entry<T, Integer> e : all.subList(0, resultSize)) {
101
+ result.add(e.getKey());
102
+ }
103
+ return Collections.unmodifiableList(result);
104
+ }
105
+
106
+ public List<Entry<T, Integer>> getAllByFrequency() {
107
+ final List<Entry<T, Integer>> all = new ArrayList<Entry<T, Integer>>(
108
+ items.entrySet());
109
+ Collections.sort(all, BY_FREQ_DESC);
110
+ return Collections.unmodifiableList(all);
111
+ }
112
+
113
+ public Integer getCount(final T item) {
114
+ final Integer freq = items.get(item);
115
+ if (freq == null) {
116
+ return 0;
117
+ }
118
+ return freq;
119
+ }
120
+
121
+ public void clear() {
122
+ items.clear();
123
+ }
124
+
125
+ public Set<Entry<T, Integer>> entrySet() {
126
+ return Collections.unmodifiableSet(items.entrySet());
127
+ }
128
+
129
+ public Set<T> keySet() {
130
+ return Collections.unmodifiableSet(items.keySet());
131
+ }
132
+
133
+ public List<T> keyList() {
134
+ return getMostFrequent(items.size());
135
+ }
136
+
137
+ @Override
138
+ public String toString() {
139
+ return items.toString();
140
+ }
141
+ }
@@ -0,0 +1,10 @@
1
+ package cue.lang;
2
+
3
+ import java.util.Iterator;
4
+
5
+ abstract class IterableText implements Iterator<String>, Iterable<String> {
6
+ @Override
7
+ public Iterator<String> iterator() {
8
+ return this;
9
+ }
10
+ }