josh-gmail-backup 0.104

Sign up to get free protection for your applications and to get access to all the features.
data/svc/retrans.py ADDED
@@ -0,0 +1,225 @@
1
+ # Copyright (C) 2008 Jan Svec and Filip Jurcicek
2
+ #
3
+ # YOU USE THIS TOOL ON YOUR OWN RISK!
4
+ #
5
+ # email: info@gmail-backup.com
6
+ #
7
+ #
8
+ # Disclaimer of Warranty
9
+ # ----------------------
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, licensor provides
12
+ # this tool (and each contributor provides its contributions) on an "AS IS"
13
+ # BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
14
+ # implied, including, without limitation, any warranties or conditions of
15
+ # TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR
16
+ # PURPOSE. You are solely responsible for determining the appropriateness of
17
+ # using this work and assume any risks associated with your exercise of
18
+ # permissions under this license.
19
+
20
+ from svc.scripting import *
21
+ import sys
22
+ import subprocess
23
+ import re
24
+
25
+ class Preprocessor(PythonEgg):
26
+ def __init__(self, cppOptions=None):
27
+ super(Preprocessor, self).__init__()
28
+ if cppOptions is None:
29
+ cppOptions = []
30
+ self.cppOptions = cppOptions
31
+
32
+ def process(self, fr):
33
+ p = subprocess.Popen(['cpp'] + self.cppOptions + ['-P'], stdin=fr, stdout=subprocess.PIPE)
34
+ return p.stdout
35
+
36
+ class RuleSet(PythonEgg):
37
+ def __init__(self, rules):
38
+ super(RuleSet, self).__init__()
39
+ self.rules = self.compileRules(rules)
40
+
41
+ def compileRules(self, rules):
42
+ return [(re.compile(i[0], re.UNICODE), i[1]) for i in rules]
43
+
44
+ def applyn(self, text):
45
+ total_count = 0
46
+ while True:
47
+ old_text = text
48
+ for pattern, new in self.rules:
49
+ text, count = pattern.subn(new, text)
50
+ total_count += count
51
+ if text == old_text:
52
+ break
53
+ return text, total_count
54
+
55
+ def apply(self, text):
56
+ return self.applyn(text)[0]
57
+
58
+
59
+ class RuleList(PythonEgg, list):
60
+ ESCAPES = {
61
+ r'\_': ' ',
62
+ r'\t': '\t',
63
+ }
64
+
65
+ def applyn(self, text):
66
+ total_count = 0
67
+ for i in self:
68
+ text, count = i.applyn(text)
69
+ total_count += count
70
+ return text, total_count
71
+
72
+ def apply(self, text):
73
+ return self.applyn(text)[0]
74
+
75
+ @classmethod
76
+ def parseRuleLine(cls, line):
77
+ parts = line.split(':')
78
+ parts = cls._joinEscapedParts(parts, ':')
79
+
80
+ parts = [i.strip() for i in parts]
81
+ parts = [cls._substituteEscapes(i) for i in parts]
82
+
83
+ ret = []
84
+ last = parts[-1]
85
+ for i in parts[:-1]:
86
+ ret.append( (i, last) )
87
+ return ret
88
+
89
+ @classmethod
90
+ def _joinEscapedParts(cls, parts, chr):
91
+ parts = list(parts)
92
+ i = 0
93
+ while i < len(parts):
94
+ if parts[i] and parts[i][-1] == '\\':
95
+ parts[i:i+2] = [parts[i][:-1] + chr + parts[i+1]]
96
+ else:
97
+ i += 1
98
+ return parts
99
+
100
+ @classmethod
101
+ def _substituteEscapes(cls, s):
102
+ for old, new in cls.ESCAPES.iteritems():
103
+ s = s.replace(old, new)
104
+ return s
105
+
106
+ @classmethod
107
+ def _cleanRules(cls, rules):
108
+ ret = []
109
+ for i1, i2 in rules:
110
+ if not i1 and not i2:
111
+ if not ret or ret[-1] is not None:
112
+ ret.append(None)
113
+ else:
114
+ ret.append( (i1, i2) )
115
+ return ret
116
+
117
+ @classmethod
118
+ def createFromString(cls, s, ruleClass=RuleSet):
119
+ rules = []
120
+ for line in s.splitlines():
121
+ rules.extend( cls.parseRuleLine(line) )
122
+ return cls._createFromRules(rules, ruleClass)
123
+
124
+
125
+ @classmethod
126
+ def _createFromRules(cls, rules, ruleClass=RuleSet):
127
+ rules = cls._cleanRules(rules)
128
+
129
+ ret = cls()
130
+ last_stop = 0
131
+ for i, item in enumerate(rules):
132
+ if item is None:
133
+ ret.append( ruleClass(rules[last_stop:i]) )
134
+ last_stop = i+1
135
+ rest = rules[last_stop:]
136
+ if rest:
137
+ ret.append( ruleClass(rest) )
138
+ return ret
139
+
140
+
141
+ @classmethod
142
+ def createFromFiles(cls, fns, encoding='utf-8', ruleClass=RuleSet):
143
+ cpp = Preprocessor()
144
+
145
+ rules = []
146
+ for fn in fns:
147
+ fr = file(fn, 'r')
148
+ try:
149
+ for line in cpp.process(fr):
150
+ line = unicode(line, encoding)
151
+ rules.extend( cls.parseRuleLine(line) )
152
+ finally:
153
+ fr.close()
154
+
155
+ return cls._createFromRules(rules, ruleClass)
156
+
157
+
158
+ class ReTrans(Script):
159
+ options = {
160
+ 'input': String,
161
+ 'output': String,
162
+ 'encoding': String,
163
+ 'batch': Flag,
164
+ 'files': (Required, Multiple, String),
165
+ }
166
+
167
+ shortOpts = {
168
+ 'i': 'input',
169
+ 'o': 'output',
170
+ 'b': 'batch',
171
+ 'e': 'encoding',
172
+ }
173
+
174
+ posOpts = ['files', Ellipsis]
175
+
176
+ def process(self, rules, fr, fw, encoding):
177
+ text = unicode(fr.read(), encoding)
178
+ text = rules.apply(text)
179
+ fw.write(text.encode(encoding))
180
+
181
+ def main(self, files, input='-', output='-', encoding='utf-8', batch=False):
182
+ rules = RuleList.createFromFiles(files, encoding)
183
+
184
+ if not batch:
185
+ if input == '-':
186
+ input = sys.stdin
187
+ else:
188
+ input = file(input, 'r')
189
+
190
+ if output == '-':
191
+ output = sys.stdout
192
+ else:
193
+ output = file(output, 'w')
194
+
195
+ self.process(rules, input, output, encoding)
196
+ input.close()
197
+ output.close()
198
+ else:
199
+ if not os.path.isdir(input):
200
+ raise ValueError("Input directory %s doesn't exist" % input)
201
+
202
+ if not os.path.isdir(output):
203
+ raise ValueError("Output directory %s doesn't exist" % output)
204
+
205
+
206
+ for fn in os.listdir(input):
207
+ fni = os.path.join(input, fn)
208
+ fr = file(fni, 'r')
209
+
210
+ fno = os.path.join(output, fn)
211
+ fw = file(fno, 'w')
212
+
213
+ self.logger.info("Processing file %s into %s", fni, fno)
214
+
215
+ self.process(rules, fr, fw, encoding)
216
+
217
+ fr.close()
218
+ fw.close()
219
+
220
+
221
+
222
+ if __name__ == '__main__':
223
+ s = ReTrans()
224
+ s.run()
225
+