charlock_holmes_bundle_icu 0.6.9.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,254 @@
1
+ #lang racket/base
2
+
3
+ (require (for-syntax syntax/parse racket/syntax racket)
4
+ ffi/unsafe racket/function racket/string
5
+ "start.rkt" "c.rkt")
6
+
7
+ (struct jtype (signature tag predicate ctype racket->java java->racket))
8
+ (struct jtype/object jtype (class))
9
+ (struct jtype/vector jtype/object (element))
10
+ (struct jvector (cpointer type length))
11
+ (struct jprocedure (args return proc))
12
+
13
+ (define ((single-compose f1 f2) e) (f1 (f2 e)))
14
+
15
+ (define (make-jtype obj racket->java java->racket)
16
+ (let ([composed-racket->java (single-compose (jtype-racket->java obj) racket->java)]
17
+ [composed-java->racket (single-compose java->racket (jtype-java->racket obj))])
18
+ ; due to limitation in racket's struct-copy
19
+ (cond
20
+ [(jtype/vector? obj)
21
+ (struct-copy jtype/vector obj
22
+ [racket->java #:parent jtype composed-racket->java]
23
+ [java->racket #:parent jtype composed-java->racket])]
24
+ [(jtype/object? obj)
25
+ (struct-copy jtype/object obj
26
+ [racket->java #:parent jtype composed-racket->java]
27
+ [java->racket #:parent jtype composed-java->racket])]
28
+ [else
29
+ (struct-copy jtype obj
30
+ [racket->java composed-racket->java]
31
+ [java->racket composed-java->racket])])))
32
+
33
+ (define (jtype->ctype obj)
34
+ (make-ctype (jtype-ctype obj) (jtype-racket->java obj) (jtype-java->racket obj)))
35
+
36
+ ; --- signature makers ---
37
+ (define (make-class-signature c) (string-append "L" c ";"))
38
+ (define (make-vector-signature s) (string-append "[" s))
39
+ (define (make-signature args return)
40
+ (let ([args-signature (string-append* (map jtype-signature args))]
41
+ [return-signature (jtype-signature return)])
42
+ (string-append "(" args-signature ")" return-signature)))
43
+
44
+ ; --- predicates for java types on racket ---
45
+ (require (only-in web-server/dispatch/extend make-coerce-safe?) srfi/26/cut)
46
+
47
+ (define jboolean? boolean?)
48
+ (define jbyte? byte?)
49
+ (define jchar? char?)
50
+ (define jshort? (make-coerce-safe? (cut < -32768 <> 32767)))
51
+ (define jint? (make-coerce-safe? (cut < -2147483648 <> 2147483647)))
52
+ (define jlong? (make-coerce-safe? (cut < -9223372036854775808 <> 9223372036854775807)))
53
+ (define jfloat? single-flonum?)
54
+ (define jdouble? flonum?)
55
+ (define jstring? string?)
56
+ (define ((make-jobject-predicate clss) o) (instance-of? o clss))
57
+ (define ((make-jlist-predicate element?) o) (andmap element? o))
58
+
59
+ ; --- java types ---
60
+ (define _jboolean (jtype "Z" 'boolean jboolean? __jboolean #f #f))
61
+ (define _jbyte (jtype "B" 'byte jbyte? __jbyte #f #f))
62
+ (define _jchar (jtype "C" 'char jchar? __jchar char->integer integer->char))
63
+ (define _jshort (jtype "S" 'short jshort? __jshort #f #f))
64
+ (define _jint (jtype "I" 'int jint? __jint #f #f))
65
+ (define _jlong (jtype "J" 'long jlong? __jlong #f #f))
66
+ (define _jfloat (jtype "F" 'float jfloat? __jfloat #f #f))
67
+ (define _jdouble (jtype "D" 'double jdouble? __jdouble #f #f))
68
+ (define _jvoid (jtype "V" 'void #f __jvoid #f #f))
69
+ ; hack for _jobject and _jlist so that they dual as a jtype and function
70
+ (define _jobject
71
+ ((λ ()
72
+ (struct _jobject jtype/object ()
73
+ #:property prop:procedure
74
+ (λ (self class-name [racket->java #f] [java->racket #f] [predicate #f])
75
+ (let ([class-id (find-class class-name)])
76
+ (struct-copy jtype/object self
77
+ [signature #:parent jtype (make-class-signature class-name)]
78
+ [predicate #:parent jtype (or predicate (make-jobject-predicate class-id))]
79
+ [racket->java #:parent jtype racket->java]
80
+ [java->racket #:parent jtype java->racket]
81
+ [class class-id]))))
82
+ (let ([class-id (find-class "Ljava/lang/Object;")])
83
+ (_jobject "Ljava/lang/Object;" 'object (make-jobject-predicate class-id)
84
+ __jobject #f #f class-id)))))
85
+ (define _jstring (_jobject "java/lang/String" new-string get-string jstring?))
86
+ (define _jlist
87
+ ((λ ()
88
+ (struct _jlist jtype/vector ()
89
+ #:property prop:procedure
90
+ (λ (self element)
91
+ (define-values (make-array array-ref array-set!) (tag->array-info (jtype-tag element)))
92
+ (when (jtype/object? element)
93
+ (let ([clss (jtype/object-class element)])
94
+ (set! make-array (λ (n) (new-object-array n clss #f)))))
95
+ (let* ([signature (make-vector-signature (jtype-signature element))]
96
+ [element-racket->java (or (jtype-racket->java element) identity)]
97
+ [element-java->racket (or (jtype-java->racket element) identity)]
98
+ [element? (or (jtype-predicate element) (λ (_) #t))])
99
+ (struct-copy jtype/vector self
100
+ [signature #:parent jtype signature]
101
+ [predicate #:parent jtype (make-jlist-predicate element?)]
102
+ [ctype #:parent jtype __jobject]
103
+ [racket->java #:parent jtype
104
+ (λ (c)
105
+ (let ([array (make-array (length c))])
106
+ (for ([e (in-list c)] [i (in-naturals)])
107
+ (array-set! array i (element-racket->java e)))
108
+ array))]
109
+ [java->racket #:parent jtype
110
+ (λ (c)
111
+ (for/list ([i (in-range (get-array-length c))])
112
+ (element-java->racket (array-ref c i))))]
113
+ [class #:parent jtype/object (find-class signature)]
114
+ [element element]))))
115
+ (let ([class-id (find-class "[Ljava/lang/Object;")]
116
+ [element-class-id (jtype/object-class _jobject)])
117
+ (_jlist "[Ljava/lang/Object;" 'object (make-jobject-predicate element-class-id) __jobject
118
+ (λ (c)
119
+ (let ([array (new-object-array (length c) element-class-id #f)])
120
+ (for ([e (in-list c)]
121
+ [i (in-naturals)])
122
+ (set-object-array-element array i e))
123
+ array))
124
+ (λ (c)
125
+ (for/list ([i (in-range (get-array-length c))])
126
+ (get-object-array-element c i)))
127
+ class-id
128
+ _jobject)))))
129
+ (define-syntax (_jmethod stx)
130
+ (define-syntax-class type #:literals (->)
131
+ (pattern (~and x (~not (~or (~literal ...) ->)))))
132
+ (syntax-parse stx #:literals (->)
133
+ [(_ arg:type ... (~optional (~seq farg:type (~literal ...))) (~optional (~seq -> return*)))
134
+ (with-syntax* ([(arg* ...) (generate-temporaries #'(arg ...))]
135
+ [(larg ... . marg) #`(arg* ... #,@(if (attribute farg) #'arg-rest #`()))]
136
+ [(aarg ...) #`(arg* ... #,@(if (attribute farg) #'(arg-rest) #`()))]
137
+ [return (if (attribute return*) #'return* #'_jvoid)])
138
+ #`(let* ([args (list arg ... #,@(if (attribute farg) #`((_jlist farg)) #`()))])
139
+ (jprocedure args return
140
+ (λ (type jnienv clss method func)
141
+ (case type
142
+ [(constructor) (λ (larg ... . marg) (func jnienv clss method aarg ...))]
143
+ [(static-method) (λ (larg ... . marg) (func jnienv clss method aarg ...))]
144
+ [(method) (λ (o larg ... . marg) (func jnienv o method aarg ...))]
145
+ [else (error '_jmethod "invalid type provided")])))))]))
146
+ ; dynamic and slower version of _jmethod
147
+ (define (_jprocedure args return #:repeat-last-arg? [repeat-last-arg? #f])
148
+ (define (nest-at lst i)
149
+ (if (null? lst) (list null)
150
+ (let loop ([lst lst] [i i])
151
+ (cond [(null? lst) null]
152
+ [(zero? i) (list lst)]
153
+ [else (cons (car lst) (loop (cdr lst) (sub1 i)))]))))
154
+ (jprocedure args return
155
+ (if repeat-last-arg?
156
+ (let ([repeat-position (sub1 (length args))])
157
+ (λ (type jnienv clss method func)
158
+ (case type
159
+ [(constructor) (λ larg (apply func jnienv clss method (nest-at larg repeat-position)))]
160
+ [(static-method) (λ larg (apply func jnienv clss method (nest-at larg repeat-position)))]
161
+ [(method) (λ (o . larg) (apply func jnienv o method (nest-at larg repeat-position)))])))
162
+ (λ (type jnienv clss method func)
163
+ (case type
164
+ [(constructor) (λ larg (apply func jnienv clss method larg))]
165
+ [(static-method) (λ larg (apply func jnienv clss method larg))]
166
+ [(method) (λ (o . larg) (apply func jnienv o method larg))]
167
+ [else (error '_jprocedure "invalid type provided")])))))
168
+ ; get-jmethod/get-jconstructor pass the following arguments (type jnienv class method func)
169
+ ; to a function created by _jmethod or _jprocedure
170
+ ; according to the type the function returns one of the following functions
171
+ ; | constructor (λ (args ...) ; doesn't need to take in an object and the class is static
172
+ ; | static-method (λ (args ...) ; same reasoning as above
173
+ ; | method (λ (object args ...)
174
+
175
+
176
+ ; --- interfacing with java methods ---
177
+ (define (get-jconstructor class-id type)
178
+ (let* ([args (jprocedure-args type)]
179
+ [return (jprocedure-return type)]
180
+ [proc (jprocedure-proc type)]
181
+ [signature (make-signature args return)]
182
+ [method-id (get-method-id class-id "<init>" signature)]
183
+ [ffi-func (get-jrffi-obj "new-object"
184
+ (_cprocedure (list* __jnienv __jclass __jmethodID (map jtype->ctype args))
185
+ __jobject))])
186
+ (proc 'constructor current-jnienv class-id method-id ffi-func)))
187
+
188
+ (define (get-jmethod class-id method-name type #:static? [static? #f])
189
+ (let* ([args (jprocedure-args type)]
190
+ [return (jprocedure-return type)]
191
+ [proc (jprocedure-proc type)]
192
+ [signature (make-signature args return)]
193
+ [method-id (get-method-id class-id method-name signature #:static? static?)]
194
+ [type (if static? 'static-method 'method)]
195
+ [ffi-func (get-jrffi-obj
196
+ (format "call-~a~a-method" (if static? "static-" "") (jtype-tag return))
197
+ (_cprocedure (append (list __jnienv (if static? __jclass __jobject)
198
+ __jmethodID) (map jtype->ctype args))
199
+ (jtype->ctype return)))])
200
+ (proc type current-jnienv class-id method-id ffi-func)))
201
+
202
+
203
+ ; --- interfacing with java fields ---
204
+ (define (get-jaccessor class-id field-name type #:static? [static? #f])
205
+ (let* ([signature (jtype-signature class-id field-name (jtype-signature type))]
206
+ [field-id (get-field-id class-id field-name signature #:static? static?)]
207
+ [ffi-func (get-jrffi-obj
208
+ (format "get-~a~a-field" (if static? "static-" "") (jtype-tag type))
209
+ (_cprocedure (list __jnienv (if static? __jclass __jobject) __jfieldID) type))])
210
+ (if static? (λ () (ffi-func current-jnienv class-id field-id))
211
+ (λ (obj) (ffi-func current-jnienv obj field-id)))))
212
+
213
+ (define (get-jmutator class-id field-name type #:static? [static? #f])
214
+ (let* ([signature (jtype-signature class-id field-name (jtype-signature type))]
215
+ [field-id (get-field-id class-id field-name signature #:static? static?)]
216
+ [ffi-func (get-jrffi-obj
217
+ (format "set-~a~a-field" (if static? "static-" "") (jtype-tag type))
218
+ (_cprocedure (list __jnienv (if static? __jclass __jobject) __jfieldID type) type))])
219
+ (if static? (λ (new-value) (ffi-func current-jnienv class-id field-id new-value))
220
+ (λ (obj new-value) (ffi-func current-jnienv obj field-id new-value)))))
221
+
222
+ (define (get-jparameter class-id field-name type #:static? [static? #f])
223
+ (let* ([accessor (get-jaccessor class-id field-name type #:static? static?)]
224
+ [mutator (get-jmutator class-id field-name type #:static? static?)])
225
+ (if static?
226
+ (case-lambda
227
+ [() (accessor)]
228
+ [(new-value) (mutator new-value)])
229
+ (case-lambda
230
+ [(obj) (accessor obj)]
231
+ [(obj new-value) (mutator obj new-value)]))))
232
+
233
+
234
+
235
+ (provide _jboolean _jbyte _jchar _jshort _jint _jlong _jfloat _jdouble _jvoid
236
+ _jobject _jstring _jlist)
237
+
238
+ (provide get-jconstructor get-jmethod get-jparameter get-jmutator get-jaccessor)
239
+
240
+ ;(provide instance-of? (rename-out [find-class find-class]) get-method-id get-field-id)
241
+
242
+
243
+ (provide (all-defined-out) : -> current-jnienv)
244
+
245
+
246
+
247
+
248
+
249
+
250
+
251
+
252
+
253
+
254
+
Binary file
@@ -0,0 +1,131 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+ #
4
+ # @author: starenka
5
+ # @email: 'moc]tod[liamg].T.E[0aknerats'[::-1]
6
+
7
+ import warnings, hashlib, simplejson, string
8
+ from os.path import dirname, abspath
9
+
10
+ from flask import Flask, render_template, request
11
+ from flaskext.sqlalchemy import SQLAlchemy
12
+ try:
13
+ from sqlalchemy.exceptions import IntegrityError
14
+ except ImportError:
15
+ from sqlalchemy.exc import IntegrityError
16
+
17
+ #Hey monkey patcher! NLTK's NgramModel is not serializable w/ pickle.HIGHEST_PROTOCOL (2)
18
+ from werkzeug.contrib import cache
19
+ cache.HIGHEST_PROTOCOL = 1
20
+ from werkzeug.contrib.cache import SimpleCache
21
+
22
+ from BeautifulSoup import BeautifulSoup
23
+ import nltk
24
+
25
+ PUNCT = list(unicode(string.punctuation))
26
+
27
+ app = Flask(__name__)
28
+ app.config.from_object('settings')
29
+ cache = SimpleCache()
30
+
31
+ app.config['SQLALCHEMY_DATABASE_URI'] = 'sqlite:///%s/db.sqlite3'%abspath(dirname(__file__))
32
+ db = SQLAlchemy(app)
33
+
34
+ class Sample(db.Model):
35
+ id = db.Column(db.Integer, primary_key=True)
36
+ url = db.Column(db.String(80), unique=True)
37
+ text = db.Column(db.String())
38
+ enabled = db.Column(db.Boolean())
39
+
40
+ def __unicode__(self):
41
+ str = unicode(BeautifulSoup(self.text,convertEntities=BeautifulSoup.HTML_ENTITIES))
42
+ return nltk.clean_html(str)
43
+
44
+ @classmethod
45
+ def get_all(self):
46
+ cached = cache.get('samples')
47
+ if cached is None:
48
+ cached = self.query.filter_by(enabled=True).all()
49
+ cache.set('samples', cached, timeout=app.config['CACHE_MINUTES'] * 60)
50
+ return cached
51
+
52
+ class Output(db.Model):
53
+ id = db.Column(db.Integer, primary_key=True)
54
+ hash = db.Column(db.String(128),unique=True)
55
+ text = db.Column(db.String())
56
+ params = db.Column(db.String(100))
57
+
58
+ def __init__(self,text,**params):
59
+ self.hash = hashlib.sha512(text.encode('utf8')).hexdigest()
60
+ self.text = text
61
+ self.params = simplejson.dumps(params)
62
+
63
+ @app.context_processor
64
+ def base_context():
65
+ return dict(settings=app.config,
66
+ hits = Output.query.count() + app.config['INIT_HITS']
67
+ )
68
+
69
+ @app.errorhandler(404)
70
+ def page_not_found(error):
71
+ return render_template('404.html',title=u"To tady nemáme!"), 404
72
+
73
+ @app.route('/faq')
74
+ def faq():
75
+ return render_template('faq.html',title=u"Často kladené dotazy",samples=Sample.get_all())
76
+
77
+ @app.route('/permalink/<hash>')
78
+ def permalink(hash):
79
+ one = Output.query.filter_by(hash=hash).first_or_404()
80
+ return render_template('generator.html', title=u"Henrykuj!",
81
+ text=one.text, hash=one.hash,
82
+ **simplejson.loads(one.params)
83
+ )
84
+
85
+ @app.route('/')
86
+ def index():
87
+ bigrams = request.args.get('bigrams',False)
88
+ try:
89
+ words = int(request.args.get('words',app.config['WORDS']))
90
+ if words > app.config['MAX_WORDS']:
91
+ words = app.config['MAX_WORDS']
92
+ except ValueError:
93
+ words = app.config['WORDS']
94
+
95
+ out = _generate(words,bigrams)
96
+ output = Output(out,words=words,bigrams=bool(bigrams))
97
+ try:
98
+ db.session.add(output)
99
+ db.session.commit()
100
+ except IntegrityError:
101
+ pass
102
+
103
+ return render_template('generator.html', title=u"Henrykuj!",
104
+ text=out, hash=output.hash,
105
+ words=words, bigrams=bigrams
106
+ )
107
+
108
+ def _get_ngram_model(bigrams):
109
+ #NLTK produces a LOT of warnings - don't mess with my error log
110
+ warnings.simplefilter("ignore")
111
+ cached = cache.get('ngram_model')
112
+ if cached is None:
113
+ samples = Sample.get_all()
114
+ if samples:
115
+ text = [unicode(s) for s in samples]
116
+ tokenizer = nltk.tokenize.WordPunctTokenizer()
117
+ tokenized = tokenizer.tokenize(' '.join(text))
118
+ cached = nltk.NgramModel(3-int(bool(bigrams)), tokenized)
119
+ cache.set('ngram_model', cached, timeout=app.config['CACHE_MINUTES'] * 60)
120
+ return cached
121
+
122
+ def _generate(words,bigrams):
123
+ model = _get_ngram_model(bigrams)
124
+ starts = model.generate(100)[-4:]
125
+ starts = filter(lambda a: a not in PUNCT,starts)
126
+ generated = model.generate(words, starts)
127
+ out = ' '.join(generated).replace(' , ',', ').replace(' . ','. ')
128
+ return '%s%s&hellip;'%(out[0].upper(),out[1:])
129
+
130
+ if __name__ == '__main__':
131
+ app.run()
@@ -0,0 +1,109 @@
1
+ ; Copyright (c) Rich Hickey. All rights reserved.
2
+ ; The use and distribution terms for this software are covered by the
3
+ ; Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php)
4
+ ; which can be found in the file epl-v10.html at the root of this distribution.
5
+ ; By using this software in any fashion, you are agreeing to be bound by
6
+ ; the terms of this license.
7
+ ; You must not remove this notice, or any other, from this software.
8
+
9
+ (ns clojure.browser.repl2
10
+ (:require [clojure.browser.net :as net]
11
+ [clojure.browser.event :as event]
12
+ [goog.json :as gjson]))
13
+
14
+ ;; Notes
15
+ ;; =====
16
+ ;;
17
+ ;; Using keywords for the service names does not work in Chrome or
18
+ ;; FireFox.
19
+ ;;
20
+ ;; --
21
+
22
+ (defn log-obj [obj]
23
+ (.log js/console obj))
24
+
25
+ ;; Outer/Parent Peer
26
+ ;; =================
27
+ ;;
28
+ ;; The code in this section will be run in the parent page which
29
+ ;; exists in the application's domain. This is where code will be
30
+ ;; evaluated.
31
+
32
+ (def parent-channel (atom nil))
33
+
34
+ (defn- ensure-string [val]
35
+ (if (string? val)
36
+ val
37
+ (str val)))
38
+
39
+ (defn evaluate-javascript
40
+ "Given a block of JavaScript, evaluate it and transmit the result to
41
+ the inner peer of the cross domain channel."
42
+ [block]
43
+ (log-obj (str "evaluating: " block))
44
+ (let [result (pr-str
45
+ (try {:status :success :value (ensure-string (js* "eval(~{block})"))}
46
+ (catch js/Error e {:status :exception :value (pr-str e)})))]
47
+ (log-obj (str "result: " result))
48
+ (net/transmit @parent-channel "return-value" result)))
49
+
50
+ (defn create-cross-domain-channel
51
+ "Create a cross domain channel with an iframe which can communicate
52
+ with the REPL server."
53
+ [url]
54
+ (let [chnl (doto (net/xpc-connection {:peer_uri (str url "/repl")})
55
+ (net/register-service "evaluate-javascript" evaluate-javascript)
56
+ (net/connect document.body
57
+ (fn [] (log-obj "Parent channel connected."))
58
+ (fn [iframe] (set! iframe.style.display "none"))))]
59
+ (reset! parent-channel chnl)))
60
+
61
+ (defn connect
62
+ "Connect to a ClojureScript REPL server located at the passed url."
63
+ [url]
64
+ (goog.events/listen js/window "load" #(create-cross-domain-channel url)))
65
+
66
+ ;; Inner peer
67
+ ;; =========
68
+ ;;
69
+ ;; The code in this section will be run in the child iframe and can
70
+ ;; communicate with REPL server.
71
+
72
+ (def state (atom {:connection nil :url nil}))
73
+
74
+ (def child-channel (atom nil))
75
+
76
+ (defn transmit-post [connection url data]
77
+ (net/transmit connection url "POST" data nil 0))
78
+
79
+ (defn start-repl-connection
80
+ "Start the REPL loop"
81
+ [url]
82
+ (let [connection (net/xhr-connection)]
83
+ (reset! state {:connection connection :url url})
84
+ (event/listen connection
85
+ :success
86
+ (fn [e]
87
+ (net/transmit @child-channel
88
+ "evaluate-javascript"
89
+ (.getResponseText e/currentTarget ()))))
90
+ ;; The server is expecting to see the string "ready" for the
91
+ ;; initial connection.
92
+ (transmit-post connection url "ready")))
93
+
94
+ (defn return-value [val]
95
+ (log-obj (str "sending: " val))
96
+ (transmit-post (:connection @state) (:url @state) val))
97
+
98
+ ;; I can't get this to work using the clojure.browser.net api.
99
+
100
+ (defn inner-peer-channel
101
+ "This function will be called from a script in the child iframe."
102
+ [repl-url]
103
+ (let [cfg (gjson/parse (.getParameterValue (goog.Uri. window.location.href) "xpc"))
104
+ chnl (doto (goog.net.xpc.CrossPageChannel. cfg)
105
+ (net/register-service "return-value" return-value)
106
+ (.connect #(log-obj "Child channel connected.")))]
107
+ (do (reset! child-channel chnl)
108
+ (js/setTimeout #(start-repl-connection repl-url) 500))))
109
+