charlock_holmes 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,254 @@
1
+ #lang racket/base
2
+
3
+ (require (for-syntax syntax/parse racket/syntax racket)
4
+ ffi/unsafe racket/function racket/string
5
+ "start.rkt" "c.rkt")
6
+
7
+ (struct jtype (signature tag predicate ctype racket->java java->racket))
8
+ (struct jtype/object jtype (class))
9
+ (struct jtype/vector jtype/object (element))
10
+ (struct jvector (cpointer type length))
11
+ (struct jprocedure (args return proc))
12
+
13
+ (define ((single-compose f1 f2) e) (f1 (f2 e)))
14
+
15
+ (define (make-jtype obj racket->java java->racket)
16
+ (let ([composed-racket->java (single-compose (jtype-racket->java obj) racket->java)]
17
+ [composed-java->racket (single-compose java->racket (jtype-java->racket obj))])
18
+ ; due to limitation in racket's struct-copy
19
+ (cond
20
+ [(jtype/vector? obj)
21
+ (struct-copy jtype/vector obj
22
+ [racket->java #:parent jtype composed-racket->java]
23
+ [java->racket #:parent jtype composed-java->racket])]
24
+ [(jtype/object? obj)
25
+ (struct-copy jtype/object obj
26
+ [racket->java #:parent jtype composed-racket->java]
27
+ [java->racket #:parent jtype composed-java->racket])]
28
+ [else
29
+ (struct-copy jtype obj
30
+ [racket->java composed-racket->java]
31
+ [java->racket composed-java->racket])])))
32
+
33
+ (define (jtype->ctype obj)
34
+ (make-ctype (jtype-ctype obj) (jtype-racket->java obj) (jtype-java->racket obj)))
35
+
36
+ ; --- signature makers ---
37
+ (define (make-class-signature c) (string-append "L" c ";"))
38
+ (define (make-vector-signature s) (string-append "[" s))
39
+ (define (make-signature args return)
40
+ (let ([args-signature (string-append* (map jtype-signature args))]
41
+ [return-signature (jtype-signature return)])
42
+ (string-append "(" args-signature ")" return-signature)))
43
+
44
+ ; --- predicates for java types on racket ---
45
+ (require (only-in web-server/dispatch/extend make-coerce-safe?) srfi/26/cut)
46
+
47
+ (define jboolean? boolean?)
48
+ (define jbyte? byte?)
49
+ (define jchar? char?)
50
+ (define jshort? (make-coerce-safe? (cut < -32768 <> 32767)))
51
+ (define jint? (make-coerce-safe? (cut < -2147483648 <> 2147483647)))
52
+ (define jlong? (make-coerce-safe? (cut < -9223372036854775808 <> 9223372036854775807)))
53
+ (define jfloat? single-flonum?)
54
+ (define jdouble? flonum?)
55
+ (define jstring? string?)
56
+ (define ((make-jobject-predicate clss) o) (instance-of? o clss))
57
+ (define ((make-jlist-predicate element?) o) (andmap element? o))
58
+
59
+ ; --- java types ---
60
+ (define _jboolean (jtype "Z" 'boolean jboolean? __jboolean #f #f))
61
+ (define _jbyte (jtype "B" 'byte jbyte? __jbyte #f #f))
62
+ (define _jchar (jtype "C" 'char jchar? __jchar char->integer integer->char))
63
+ (define _jshort (jtype "S" 'short jshort? __jshort #f #f))
64
+ (define _jint (jtype "I" 'int jint? __jint #f #f))
65
+ (define _jlong (jtype "J" 'long jlong? __jlong #f #f))
66
+ (define _jfloat (jtype "F" 'float jfloat? __jfloat #f #f))
67
+ (define _jdouble (jtype "D" 'double jdouble? __jdouble #f #f))
68
+ (define _jvoid (jtype "V" 'void #f __jvoid #f #f))
69
+ ; hack for _jobject and _jlist so that they dual as a jtype and function
70
+ (define _jobject
71
+ ((λ ()
72
+ (struct _jobject jtype/object ()
73
+ #:property prop:procedure
74
+ (λ (self class-name [racket->java #f] [java->racket #f] [predicate #f])
75
+ (let ([class-id (find-class class-name)])
76
+ (struct-copy jtype/object self
77
+ [signature #:parent jtype (make-class-signature class-name)]
78
+ [predicate #:parent jtype (or predicate (make-jobject-predicate class-id))]
79
+ [racket->java #:parent jtype racket->java]
80
+ [java->racket #:parent jtype java->racket]
81
+ [class class-id]))))
82
+ (let ([class-id (find-class "Ljava/lang/Object;")])
83
+ (_jobject "Ljava/lang/Object;" 'object (make-jobject-predicate class-id)
84
+ __jobject #f #f class-id)))))
85
+ (define _jstring (_jobject "java/lang/String" new-string get-string jstring?))
86
+ (define _jlist
87
+ ((λ ()
88
+ (struct _jlist jtype/vector ()
89
+ #:property prop:procedure
90
+ (λ (self element)
91
+ (define-values (make-array array-ref array-set!) (tag->array-info (jtype-tag element)))
92
+ (when (jtype/object? element)
93
+ (let ([clss (jtype/object-class element)])
94
+ (set! make-array (λ (n) (new-object-array n clss #f)))))
95
+ (let* ([signature (make-vector-signature (jtype-signature element))]
96
+ [element-racket->java (or (jtype-racket->java element) identity)]
97
+ [element-java->racket (or (jtype-java->racket element) identity)]
98
+ [element? (or (jtype-predicate element) (λ (_) #t))])
99
+ (struct-copy jtype/vector self
100
+ [signature #:parent jtype signature]
101
+ [predicate #:parent jtype (make-jlist-predicate element?)]
102
+ [ctype #:parent jtype __jobject]
103
+ [racket->java #:parent jtype
104
+ (λ (c)
105
+ (let ([array (make-array (length c))])
106
+ (for ([e (in-list c)] [i (in-naturals)])
107
+ (array-set! array i (element-racket->java e)))
108
+ array))]
109
+ [java->racket #:parent jtype
110
+ (λ (c)
111
+ (for/list ([i (in-range (get-array-length c))])
112
+ (element-java->racket (array-ref c i))))]
113
+ [class #:parent jtype/object (find-class signature)]
114
+ [element element]))))
115
+ (let ([class-id (find-class "[Ljava/lang/Object;")]
116
+ [element-class-id (jtype/object-class _jobject)])
117
+ (_jlist "[Ljava/lang/Object;" 'object (make-jobject-predicate element-class-id) __jobject
118
+ (λ (c)
119
+ (let ([array (new-object-array (length c) element-class-id #f)])
120
+ (for ([e (in-list c)]
121
+ [i (in-naturals)])
122
+ (set-object-array-element array i e))
123
+ array))
124
+ (λ (c)
125
+ (for/list ([i (in-range (get-array-length c))])
126
+ (get-object-array-element c i)))
127
+ class-id
128
+ _jobject)))))
129
+ (define-syntax (_jmethod stx)
130
+ (define-syntax-class type #:literals (->)
131
+ (pattern (~and x (~not (~or (~literal ...) ->)))))
132
+ (syntax-parse stx #:literals (->)
133
+ [(_ arg:type ... (~optional (~seq farg:type (~literal ...))) (~optional (~seq -> return*)))
134
+ (with-syntax* ([(arg* ...) (generate-temporaries #'(arg ...))]
135
+ [(larg ... . marg) #`(arg* ... #,@(if (attribute farg) #'arg-rest #`()))]
136
+ [(aarg ...) #`(arg* ... #,@(if (attribute farg) #'(arg-rest) #`()))]
137
+ [return (if (attribute return*) #'return* #'_jvoid)])
138
+ #`(let* ([args (list arg ... #,@(if (attribute farg) #`((_jlist farg)) #`()))])
139
+ (jprocedure args return
140
+ (λ (type jnienv clss method func)
141
+ (case type
142
+ [(constructor) (λ (larg ... . marg) (func jnienv clss method aarg ...))]
143
+ [(static-method) (λ (larg ... . marg) (func jnienv clss method aarg ...))]
144
+ [(method) (λ (o larg ... . marg) (func jnienv o method aarg ...))]
145
+ [else (error '_jmethod "invalid type provided")])))))]))
146
+ ; dynamic and slower version of _jmethod
147
+ (define (_jprocedure args return #:repeat-last-arg? [repeat-last-arg? #f])
148
+ (define (nest-at lst i)
149
+ (if (null? lst) (list null)
150
+ (let loop ([lst lst] [i i])
151
+ (cond [(null? lst) null]
152
+ [(zero? i) (list lst)]
153
+ [else (cons (car lst) (loop (cdr lst) (sub1 i)))]))))
154
+ (jprocedure args return
155
+ (if repeat-last-arg?
156
+ (let ([repeat-position (sub1 (length args))])
157
+ (λ (type jnienv clss method func)
158
+ (case type
159
+ [(constructor) (λ larg (apply func jnienv clss method (nest-at larg repeat-position)))]
160
+ [(static-method) (λ larg (apply func jnienv clss method (nest-at larg repeat-position)))]
161
+ [(method) (λ (o . larg) (apply func jnienv o method (nest-at larg repeat-position)))])))
162
+ (λ (type jnienv clss method func)
163
+ (case type
164
+ [(constructor) (λ larg (apply func jnienv clss method larg))]
165
+ [(static-method) (λ larg (apply func jnienv clss method larg))]
166
+ [(method) (λ (o . larg) (apply func jnienv o method larg))]
167
+ [else (error '_jprocedure "invalid type provided")])))))
168
+ ; get-jmethod/get-jconstructor pass the following arguments (type jnienv class method func)
169
+ ; to a function created by _jmethod or _jprocedure
170
+ ; according to the type the function returns one of the following functions
171
+ ; | constructor (λ (args ...) ; doesn't need to take in an object and the class is static
172
+ ; | static-method (λ (args ...) ; same reasoning as above
173
+ ; | method (λ (object args ...)
174
+
175
+
176
+ ; --- interfacing with java methods ---
177
+ (define (get-jconstructor class-id type)
178
+ (let* ([args (jprocedure-args type)]
179
+ [return (jprocedure-return type)]
180
+ [proc (jprocedure-proc type)]
181
+ [signature (make-signature args return)]
182
+ [method-id (get-method-id class-id "<init>" signature)]
183
+ [ffi-func (get-jrffi-obj "new-object"
184
+ (_cprocedure (list* __jnienv __jclass __jmethodID (map jtype->ctype args))
185
+ __jobject))])
186
+ (proc 'constructor current-jnienv class-id method-id ffi-func)))
187
+
188
+ (define (get-jmethod class-id method-name type #:static? [static? #f])
189
+ (let* ([args (jprocedure-args type)]
190
+ [return (jprocedure-return type)]
191
+ [proc (jprocedure-proc type)]
192
+ [signature (make-signature args return)]
193
+ [method-id (get-method-id class-id method-name signature #:static? static?)]
194
+ [type (if static? 'static-method 'method)]
195
+ [ffi-func (get-jrffi-obj
196
+ (format "call-~a~a-method" (if static? "static-" "") (jtype-tag return))
197
+ (_cprocedure (append (list __jnienv (if static? __jclass __jobject)
198
+ __jmethodID) (map jtype->ctype args))
199
+ (jtype->ctype return)))])
200
+ (proc type current-jnienv class-id method-id ffi-func)))
201
+
202
+
203
+ ; --- interfacing with java fields ---
204
+ (define (get-jaccessor class-id field-name type #:static? [static? #f])
205
+ (let* ([signature (jtype-signature class-id field-name (jtype-signature type))]
206
+ [field-id (get-field-id class-id field-name signature #:static? static?)]
207
+ [ffi-func (get-jrffi-obj
208
+ (format "get-~a~a-field" (if static? "static-" "") (jtype-tag type))
209
+ (_cprocedure (list __jnienv (if static? __jclass __jobject) __jfieldID) type))])
210
+ (if static? (λ () (ffi-func current-jnienv class-id field-id))
211
+ (λ (obj) (ffi-func current-jnienv obj field-id)))))
212
+
213
+ (define (get-jmutator class-id field-name type #:static? [static? #f])
214
+ (let* ([signature (jtype-signature class-id field-name (jtype-signature type))]
215
+ [field-id (get-field-id class-id field-name signature #:static? static?)]
216
+ [ffi-func (get-jrffi-obj
217
+ (format "set-~a~a-field" (if static? "static-" "") (jtype-tag type))
218
+ (_cprocedure (list __jnienv (if static? __jclass __jobject) __jfieldID type) type))])
219
+ (if static? (λ (new-value) (ffi-func current-jnienv class-id field-id new-value))
220
+ (λ (obj new-value) (ffi-func current-jnienv obj field-id new-value)))))
221
+
222
+ (define (get-jparameter class-id field-name type #:static? [static? #f])
223
+ (let* ([accessor (get-jaccessor class-id field-name type #:static? static?)]
224
+ [mutator (get-jmutator class-id field-name type #:static? static?)])
225
+ (if static?
226
+ (case-lambda
227
+ [() (accessor)]
228
+ [(new-value) (mutator new-value)])
229
+ (case-lambda
230
+ [(obj) (accessor obj)]
231
+ [(obj new-value) (mutator obj new-value)]))))
232
+
233
+
234
+
235
+ (provide _jboolean _jbyte _jchar _jshort _jint _jlong _jfloat _jdouble _jvoid
236
+ _jobject _jstring _jlist)
237
+
238
+ (provide get-jconstructor get-jmethod get-jparameter get-jmutator get-jaccessor)
239
+
240
+ ;(provide instance-of? (rename-out [find-class find-class]) get-method-id get-field-id)
241
+
242
+
243
+ (provide (all-defined-out) : -> current-jnienv)
244
+
245
+
246
+
247
+
248
+
249
+
250
+
251
+
252
+
253
+
254
+
@@ -0,0 +1,131 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+ #
4
+ # @author: starenka
5
+ # @email: 'moc]tod[liamg].T.E[0aknerats'[::-1]
6
+
7
+ import warnings, hashlib, simplejson, string
8
+ from os.path import dirname, abspath
9
+
10
+ from flask import Flask, render_template, request
11
+ from flaskext.sqlalchemy import SQLAlchemy
12
+ try:
13
+ from sqlalchemy.exceptions import IntegrityError
14
+ except ImportError:
15
+ from sqlalchemy.exc import IntegrityError
16
+
17
+ #Hey monkey patcher! NLTK's NgramModel is not serializable w/ pickle.HIGHEST_PROTOCOL (2)
18
+ from werkzeug.contrib import cache
19
+ cache.HIGHEST_PROTOCOL = 1
20
+ from werkzeug.contrib.cache import SimpleCache
21
+
22
+ from BeautifulSoup import BeautifulSoup
23
+ import nltk
24
+
25
+ PUNCT = list(unicode(string.punctuation))
26
+
27
+ app = Flask(__name__)
28
+ app.config.from_object('settings')
29
+ cache = SimpleCache()
30
+
31
+ app.config['SQLALCHEMY_DATABASE_URI'] = 'sqlite:///%s/db.sqlite3'%abspath(dirname(__file__))
32
+ db = SQLAlchemy(app)
33
+
34
+ class Sample(db.Model):
35
+ id = db.Column(db.Integer, primary_key=True)
36
+ url = db.Column(db.String(80), unique=True)
37
+ text = db.Column(db.String())
38
+ enabled = db.Column(db.Boolean())
39
+
40
+ def __unicode__(self):
41
+ str = unicode(BeautifulSoup(self.text,convertEntities=BeautifulSoup.HTML_ENTITIES))
42
+ return nltk.clean_html(str)
43
+
44
+ @classmethod
45
+ def get_all(self):
46
+ cached = cache.get('samples')
47
+ if cached is None:
48
+ cached = self.query.filter_by(enabled=True).all()
49
+ cache.set('samples', cached, timeout=app.config['CACHE_MINUTES'] * 60)
50
+ return cached
51
+
52
+ class Output(db.Model):
53
+ id = db.Column(db.Integer, primary_key=True)
54
+ hash = db.Column(db.String(128),unique=True)
55
+ text = db.Column(db.String())
56
+ params = db.Column(db.String(100))
57
+
58
+ def __init__(self,text,**params):
59
+ self.hash = hashlib.sha512(text.encode('utf8')).hexdigest()
60
+ self.text = text
61
+ self.params = simplejson.dumps(params)
62
+
63
+ @app.context_processor
64
+ def base_context():
65
+ return dict(settings=app.config,
66
+ hits = Output.query.count() + app.config['INIT_HITS']
67
+ )
68
+
69
+ @app.errorhandler(404)
70
+ def page_not_found(error):
71
+ return render_template('404.html',title=u"To tady nemáme!"), 404
72
+
73
+ @app.route('/faq')
74
+ def faq():
75
+ return render_template('faq.html',title=u"Často kladené dotazy",samples=Sample.get_all())
76
+
77
+ @app.route('/permalink/<hash>')
78
+ def permalink(hash):
79
+ one = Output.query.filter_by(hash=hash).first_or_404()
80
+ return render_template('generator.html', title=u"Henrykuj!",
81
+ text=one.text, hash=one.hash,
82
+ **simplejson.loads(one.params)
83
+ )
84
+
85
+ @app.route('/')
86
+ def index():
87
+ bigrams = request.args.get('bigrams',False)
88
+ try:
89
+ words = int(request.args.get('words',app.config['WORDS']))
90
+ if words > app.config['MAX_WORDS']:
91
+ words = app.config['MAX_WORDS']
92
+ except ValueError:
93
+ words = app.config['WORDS']
94
+
95
+ out = _generate(words,bigrams)
96
+ output = Output(out,words=words,bigrams=bool(bigrams))
97
+ try:
98
+ db.session.add(output)
99
+ db.session.commit()
100
+ except IntegrityError:
101
+ pass
102
+
103
+ return render_template('generator.html', title=u"Henrykuj!",
104
+ text=out, hash=output.hash,
105
+ words=words, bigrams=bigrams
106
+ )
107
+
108
+ def _get_ngram_model(bigrams):
109
+ #NLTK produces a LOT of warnings - don't mess with my error log
110
+ warnings.simplefilter("ignore")
111
+ cached = cache.get('ngram_model')
112
+ if cached is None:
113
+ samples = Sample.get_all()
114
+ if samples:
115
+ text = [unicode(s) for s in samples]
116
+ tokenizer = nltk.tokenize.WordPunctTokenizer()
117
+ tokenized = tokenizer.tokenize(' '.join(text))
118
+ cached = nltk.NgramModel(3-int(bool(bigrams)), tokenized)
119
+ cache.set('ngram_model', cached, timeout=app.config['CACHE_MINUTES'] * 60)
120
+ return cached
121
+
122
+ def _generate(words,bigrams):
123
+ model = _get_ngram_model(bigrams)
124
+ starts = model.generate(100)[-4:]
125
+ starts = filter(lambda a: a not in PUNCT,starts)
126
+ generated = model.generate(words, starts)
127
+ out = ' '.join(generated).replace(' , ',', ').replace(' . ','. ')
128
+ return '%s%s&hellip;'%(out[0].upper(),out[1:])
129
+
130
+ if __name__ == '__main__':
131
+ app.run()
@@ -0,0 +1,109 @@
1
+ ; Copyright (c) Rich Hickey. All rights reserved.
2
+ ; The use and distribution terms for this software are covered by the
3
+ ; Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php)
4
+ ; which can be found in the file epl-v10.html at the root of this distribution.
5
+ ; By using this software in any fashion, you are agreeing to be bound by
6
+ ; the terms of this license.
7
+ ; You must not remove this notice, or any other, from this software.
8
+
9
+ (ns clojure.browser.repl2
10
+ (:require [clojure.browser.net :as net]
11
+ [clojure.browser.event :as event]
12
+ [goog.json :as gjson]))
13
+
14
+ ;; Notes
15
+ ;; =====
16
+ ;;
17
+ ;; Using keywords for the service names does not work in Chrome or
18
+ ;; FireFox.
19
+ ;;
20
+ ;; --
21
+
22
+ (defn log-obj [obj]
23
+ (.log js/console obj))
24
+
25
+ ;; Outer/Parent Peer
26
+ ;; =================
27
+ ;;
28
+ ;; The code in this section will be run in the parent page which
29
+ ;; exists in the application's domain. This is where code will be
30
+ ;; evaluated.
31
+
32
+ (def parent-channel (atom nil))
33
+
34
+ (defn- ensure-string [val]
35
+ (if (string? val)
36
+ val
37
+ (str val)))
38
+
39
+ (defn evaluate-javascript
40
+ "Given a block of JavaScript, evaluate it and transmit the result to
41
+ the inner peer of the cross domain channel."
42
+ [block]
43
+ (log-obj (str "evaluating: " block))
44
+ (let [result (pr-str
45
+ (try {:status :success :value (ensure-string (js* "eval(~{block})"))}
46
+ (catch js/Error e {:status :exception :value (pr-str e)})))]
47
+ (log-obj (str "result: " result))
48
+ (net/transmit @parent-channel "return-value" result)))
49
+
50
+ (defn create-cross-domain-channel
51
+ "Create a cross domain channel with an iframe which can communicate
52
+ with the REPL server."
53
+ [url]
54
+ (let [chnl (doto (net/xpc-connection {:peer_uri (str url "/repl")})
55
+ (net/register-service "evaluate-javascript" evaluate-javascript)
56
+ (net/connect document.body
57
+ (fn [] (log-obj "Parent channel connected."))
58
+ (fn [iframe] (set! iframe.style.display "none"))))]
59
+ (reset! parent-channel chnl)))
60
+
61
+ (defn connect
62
+ "Connect to a ClojureScript REPL server located at the passed url."
63
+ [url]
64
+ (goog.events/listen js/window "load" #(create-cross-domain-channel url)))
65
+
66
+ ;; Inner peer
67
+ ;; =========
68
+ ;;
69
+ ;; The code in this section will be run in the child iframe and can
70
+ ;; communicate with REPL server.
71
+
72
+ (def state (atom {:connection nil :url nil}))
73
+
74
+ (def child-channel (atom nil))
75
+
76
+ (defn transmit-post [connection url data]
77
+ (net/transmit connection url "POST" data nil 0))
78
+
79
+ (defn start-repl-connection
80
+ "Start the REPL loop"
81
+ [url]
82
+ (let [connection (net/xhr-connection)]
83
+ (reset! state {:connection connection :url url})
84
+ (event/listen connection
85
+ :success
86
+ (fn [e]
87
+ (net/transmit @child-channel
88
+ "evaluate-javascript"
89
+ (.getResponseText e/currentTarget ()))))
90
+ ;; The server is expecting to see the string "ready" for the
91
+ ;; initial connection.
92
+ (transmit-post connection url "ready")))
93
+
94
+ (defn return-value [val]
95
+ (log-obj (str "sending: " val))
96
+ (transmit-post (:connection @state) (:url @state) val))
97
+
98
+ ;; I can't get this to work using the clojure.browser.net api.
99
+
100
+ (defn inner-peer-channel
101
+ "This function will be called from a script in the child iframe."
102
+ [repl-url]
103
+ (let [cfg (gjson/parse (.getParameterValue (goog.Uri. window.location.href) "xpc"))
104
+ chnl (doto (goog.net.xpc.CrossPageChannel. cfg)
105
+ (net/register-service "return-value" return-value)
106
+ (.connect #(log-obj "Child channel connected.")))]
107
+ (do (reset! child-channel chnl)
108
+ (js/setTimeout #(start-repl-connection repl-url) 500))))
109
+