RubyGems - bayes_naive_jdp - Versions diffs - 0.0.1 - Mend

bayes_naive_jdp 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

data/.gitignore +18 -0
data/Gemfile +4 -0
data/LICENSE.txt +22 -0
data/README.md +29 -0
data/Rakefile +71 -0
data/bayes_naive_jdp.gemspec +29 -0
data/lib/bayes_naive_jdp/version.rb +3 -0
data/lib/bayes_naive_jdp.rb +82 -0
data/test/documents/krugman-1 +26 -0
data/test/documents/krugman-2 +28 -0
data/test/documents/krugman-3 +23 -0
data/test/documents/krugman-4 +27 -0
data/test/documents/krugman-5 +25 -0
data/test/documents/lippert-1 +105 -0
data/test/documents/lippert-2 +17 -0
data/test/documents/lippert-3 +25 -0
data/test/documents/lippert-4 +102 -0
data/test/documents/lippert-5 +92 -0
data/test/documents/schneier-1 +35 -0
data/test/documents/schneier-2 +25 -0
data/test/documents/schneier-3 +23 -0
data/test/documents/schneier-4 +33 -0
data/test/documents/schneier-5 +36 -0
data/test/test_author_classification.rb +28 -0
metadata +127 -0

data/.gitignore ADDED Viewed

@@ -0,0 +1,18 @@
+*.gem
+*.rbc
+.bundle
+.config
+.yardoc
+Gemfile.lock
+InstalledFiles
+_yardoc
+coverage
+doc/
+lib/bundler/man
+pkg
+rdoc
+spec/reports
+test/tmp
+test/version_tmp
+tmp
+*.swp

data/Gemfile ADDED Viewed

@@ -0,0 +1,4 @@
+source 'https://rubygems.org'
+# Specify your gem's dependencies in bayes_naive_jdp.gemspec
+gemspec

data/LICENSE.txt ADDED Viewed

@@ -0,0 +1,22 @@
+Copyright (c) 2013 Jason
+MIT License
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

data/README.md ADDED Viewed

@@ -0,0 +1,29 @@
+# BayesNaiveJdp
+TODO: Write a gem description
+## Installation
+Add this line to your application's Gemfile:
+    gem 'bayes_naive_jdp'
+And then execute:
+    $ bundle
+Or install it yourself as:
+    $ gem install bayes_naive_jdp
+## Usage
+TODO: Write usage instructions here
+## Contributing
+1. Fork it
+2. Create your feature branch (`git checkout -b my-new-feature`)
+3. Commit your changes (`git commit -am 'Add some feature'`)
+4. Push to the branch (`git push origin my-new-feature`)
+5. Create new Pull Request

data/Rakefile ADDED Viewed

@@ -0,0 +1,71 @@
+require "bundler/gem_tasks"
+require 'rubygems'
+require 'rake'
+require 'date'
+#################################################################################
+#
+#	Some utilities to find out info about the package
+#
+#################################################################################
+def name
+	@name ||= Dir['*.gemspec'].first.split('.').first
+end
+def version
+	line = File.read("lib/#{name}/version.rb")[/^\s*VERSION\s*=\s*.*/]
+	line.match(/.*VERSION\s*=\s*['"](.*)['"]/)[1]
+end
+def gemspec_file
+	"#{name}.gemspec"
+end
+def gem_file
+	"pkg/#{name}-#{version}.gem"
+end
+#################################################################################
+#
+#	Tasks
+#
+#################################################################################
+desc "Run the tests"
+task :default => :test
+require 'rake/testtask'
+Rake::TestTask.new(:test) do |test|
+  test.verbose = true
+end
+desc "Build gem locally"
+task :build => :validate do
+  system "gem build #{name}.gemspec"
+  FileUtils.mkdir_p "pkg"
+  FileUtils.mv "#{name}-#{version}.gem", gem_file
+end
+desc "Publish the gem to rubygems.org"
+task :publish => :build do
+  system "gem push #{gem_file}"
+end
+desc "Install gem locally"
+task :install => :build do
+  system "gem install #{gem_file}"
+end
+desc "Validate #{gemspec_file}"
+task :validate do
+  libfiles = Dir['lib/*'] - ["lib/#{name}.rb", "lib/#{name}"]
+  unless libfiles.empty?
+    puts "Directory `lib` should only contain a `#{name}.rb` file and `#{name}` dir."
+    exit!
+  end
+  unless Dir['VERSION*'].empty?
+    puts "A `VERSION` file at root level violates Gem best practices."
+    exit!
+  end
+end

data/bayes_naive_jdp.gemspec ADDED Viewed

@@ -0,0 +1,29 @@
+# coding: utf-8
+lib = File.expand_path('../lib', __FILE__)
+$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
+require 'bayes_naive_jdp/version'
+Gem::Specification.new do |spec|
+  spec.name          = "bayes_naive_jdp"
+  spec.version       = BayesNaiveJdp::VERSION
+  spec.authors       = ["Jason Pollentier"]
+  spec.email         = ["pollentj@gmail.com"]
+  spec.description   = %q{A very simple naive Bayesian classifier.
+  	I'm just using it as practice as I learn how to package ruby code.
+	The algorithm used here is not original, but an adaptation from Burak Kanber's
+	Machine Learning in Javascript series.
+	http://readable.cc/feed/view/34236/burak-kanber-s-blog
+}
+  spec.summary       = %q{A very simple naive Bayesian classifier.}
+  spec.homepage      = ""
+  spec.license       = "MIT"
+  spec.files         = `git ls-files`.split($/)
+  spec.executables   = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
+  spec.test_files    = spec.files.grep(%r{^(test|spec|features)/})
+  spec.require_paths = ["lib"]
+  spec.add_development_dependency "bundler", "~> 1.3"
+  spec.add_development_dependency "rake"
+end

data/lib/bayes_naive_jdp/version.rb ADDED Viewed

@@ -0,0 +1,3 @@
+module BayesNaiveJdp
+  VERSION = "0.0.1"
+end

data/lib/bayes_naive_jdp.rb ADDED Viewed

@@ -0,0 +1,82 @@
+require "bayes_naive_jdp/version"
+module BayesNaiveJdp
+	class Classifier
+		def initialize
+			@custom_tokenizer = nil
+			clear
+		end
+		def train(document, label)
+			@training_set_size += 1
+			label_seen(label)
+			tokenize(document).each do |token|
+				token_seen(token, label)
+			end
+		end
+		def classify(document)
+			tokens = tokenize(document)
+			scores = {}
+			@labels.each do |label, label_frequency|
+				log_sum = 0		#	use logs to avoid floating point errors... underflow in particular
+				tokens.each do |token|
+					token_freq = @tokens[token].values.inject(0) { |sum, count| sum + count }
+					if token_freq > 0
+						token_prob = @tokens[token][label].to_f / label_frequency
+						token_inverse_prob = (token_freq - @tokens[token][label]).to_f / (@training_set_size - label_frequency)
+						wordicity = token_prob / (token_prob + token_inverse_prob)
+						#	pull harder toward neutral (0.5) if we have a small sample size
+						#	(by averaging adjustment_weight fake 0.5 scores and our actual scores)
+						adjustment_weight = 1
+						wordicity = (adjustment_weight * 0.5 + token_freq * wordicity) / (adjustment_weight + token_freq)
+						#	avoid breaking log
+						wordicity = 0.01 if wordicity == 0
+						wordicity = 0.00 if wordicity == 1
+						log_sum += Math.log(1 - wordicity) - Math.log(wordicity)
+					end
+				end
+				scores[label] = 1 / (1 + Math.exp(log_sum));
+			end
+			winner = scores.max_by { |k, v| v }
+			{
+				:winner => {:classification => winner[0], :confidence => winner[1] },
+				:all_scores => scores
+			}
+		end
+		#	supply a custom tokenizer as a block: String => [String]
+		def tokenizer(&block)
+			@custom_tokenizer = block
+		end
+		protected
+		def clear
+			@labels = Hash.new(0)
+			@tokens = Hash.new({})
+			@training_set_size = 0
+		end
+		def tokenize(document)
+			return @custom_tokenizer.call(document) if @custom_tokenizer && @custom_tokenizer.lambda?
+			#	default tokenizer: strip punctuation, splits words, and take unique occurrences
+			document.downcase.gsub(/[^a-zA-Z 0-9]/, '').split(' ').uniq
+		end
+		def label_seen(label)
+			@labels[label] += 1
+		end
+		def token_seen(token, label)
+			@tokens[token] = Hash.new(0) unless @tokens.has_key? token
+			@tokens[token][label] += 1
+		end
+	end
+end

data/test/documents/krugman-1 ADDED Viewed

@@ -0,0 +1,26 @@
+ Ten years ago, America invaded Iraq; somehow, our political class decided that we should respond to a terrorist attack by making war on a regime that, however vile, had nothing to do with that attack.
+Some voices warned that we were making a terrible mistake — that the case for war was weak and possibly fraudulent, and that far from yielding the promised easy victory, the venture was all too likely to end in costly grief. And those warnings were, of course, right.
+There were, it turned out, no weapons of mass destruction; it was obvious in retrospect that the Bush administration deliberately misled the nation into war. And the war — having cost thousands of American lives and scores of thousands of Iraqi lives, having imposed financial costs vastly higher than the war’s boosters predicted — left America weaker, not stronger, and ended up creating an Iraqi regime that is closer to Tehran than it is to Washington.
+So did our political elite and our news media learn from this experience? It sure doesn’t look like it.
+The really striking thing, during the run-up to the war, was the illusion of consensus. To this day, pundits who got it wrong excuse themselves on the grounds that “everyone” thought that there was a solid case for war. Of course, they acknowledge, there were war opponents — but they were out of the mainstream.
+The trouble with this argument is that it was and is circular: support for the war became part of the definition of what it meant to hold a mainstream opinion. Anyone who dissented, no matter how qualified, was ipso facto labeled as unworthy of consideration. This was true in political circles; it was equally true of much of the press, which effectively took sides and joined the war party.
+CNN’s Howard Kurtz, who was at The Washington Post at the time, recently wrote about how this process worked, how skeptical reporting, no matter how solid, was discouraged and rejected. “Pieces questioning the evidence or rationale for war,” he wrote, “were frequently buried, minimized or spiked.”
+Closely associated with this taking of sides was an exaggerated and inappropriate reverence for authority. Only people in positions of power were considered worthy of respect. Mr. Kurtz tells us, for example, that The Post killed a piece on war doubts by its own senior defense reporter on the grounds that it relied on retired military officials and outside experts — “in other words, those with sufficient independence to question the rationale for war.”
+All in all, it was an object lesson in the dangers of groupthink, a demonstration of how important it is to listen to skeptical voices and separate reporting from advocacy. But as I said, it’s a lesson that doesn’t seem to have been learned. Consider, as evidence, the deficit obsession that has dominated our political scene for the past three years.
+Now, I don’t want to push the analogy too far. Bad economic policy isn’t the moral equivalent of a war fought on false pretenses, and while the predictions of deficit scolds have been wrong time and again, there hasn’t been any development either as decisive or as shocking as the complete failure to find weapons of mass destruction. Best of all, these days dissenters don’t operate in the atmosphere of menace, the sense that raising doubts could have devastating personal and career consequences, that was so pervasive in 2002 and 2003. (Remember the hate campaign against the Dixie Chicks?)
+But now as then we have the illusion of consensus, an illusion based on a process in which anyone questioning the preferred narrative is immediately marginalized, no matter how strong his or her credentials. And now as then the press often seems to have taken sides. It has been especially striking how often questionable assertions are reported as fact. How many times, for example, have you seen news articles simply asserting that the United States has a “debt crisis,” even though many economists would argue that it faces no such thing?
+In fact, in some ways the line between news and opinion has been even more blurred on fiscal issues than it was in the march to war. As The Post’s Ezra Klein noted last month, it seems that “the rules of reportorial neutrality don’t apply when it comes to the deficit.”
+What we should have learned from the Iraq debacle was that you should always be skeptical and that you should never rely on supposed authority. If you hear that “everyone” supports a policy, whether it’s a war of choice or fiscal austerity, you should ask whether “everyone” has been defined to exclude anyone expressing a different opinion. And policy arguments should be evaluated on the merits, not by who expresses them; remember when Colin Powell assured us about those Iraqi W.M.D.’s?
+Unfortunately, as I said, we don’t seem to have learned those lessons. Will we ever?

data/test/documents/krugman-2 ADDED Viewed

@@ -0,0 +1,28 @@
+ It has been a big week for budget documents. In fact, members of Congress have presented not one but two full-fledged, serious proposals for spending and taxes over the next decade.
+Before I get to that, however, let me talk briefly about the third proposal presented this week — the one that isn’t serious, that’s essentially a cruel joke.
+Way back in 2010, when everybody in Washington seemed determined to anoint Representative Paul Ryan as the ultimate Serious, Honest Conservative, I pronounced him a flimflam man. Even then, his proposals were obviously fraudulent: huge cuts in aid to the poor, but even bigger tax cuts for the rich, with all the assertions of fiscal responsibility resting on claims that he would raise trillions of dollars by closing tax loopholes (which he refused to specify) and cutting discretionary spending (in ways he refused to specify).
+Since then, his budgets have gotten even flimflammier. For example, at this point, Mr. Ryan is claiming that he can slash the top tax rate from 39.6 percent to 25 percent, yet somehow raise 19.1 percent of G.D.P. in revenues — a number we haven’t come close to seeing since the dot-com bubble burst a dozen years ago.
+The good news is that Mr. Ryan’s thoroughly unconvincing policy-wonk act seems, finally, to have worn out its welcome. In 2011, his budget was initially treated with worshipful respect, which faded only slightly as critics pointed out the document’s many absurdities. This time around, quite a few pundits and reporters have greeted his release with the derision it deserves.
+And, with that, let’s turn to the serious proposals.
+Unless you’re a very careful news reader, you’ve probably heard about only one of these proposals, the one released by Senate Democrats. And let’s be clear: By comparison with the Ryan plan, and for that matter with a lot of what passes for wisdom in our nation’s capital, this is a very reasonable plan indeed.
+As many observers have pointed out, the Senate Democratic plan is conservative with a small “c”: It avoids any drastic policy changes. In particular, it steers away from draconian austerity, which is simply not needed given ultralow U.S. borrowing costs and relatively benign medium-term fiscal projections.
+True, the Senate plan calls for further deficit reduction, through a mix of modest tax increases and spending cuts. (Incidentally, the tax increases still fall well short of those called for in the Bowles-Simpson plan, which Washington, for some reason, treats as something close to holy scripture.) But it avoids large short-run spending cuts, which would hobble our recovery at a time when unemployment is still disastrously high, and it even includes a modest amount of stimulus spending.
+So we could definitely do worse than the Senate Democratic plan, and we probably will. It is, however, an extremely cautious proposal, one that doesn’t follow through on its own analysis. After all, if sharp spending cuts are a bad thing in a depressed economy — which they are — then the plan really should be calling for substantial though temporary spending increases. It doesn’t.
+But there’s a plan that does: the proposal from the Congressional Progressive Caucus, titled “Back to Work,” which calls for substantial new spending now, temporarily widening the deficit, offset by major deficit reduction later in the next decade, largely though not entirely through higher taxes on the wealthy, corporations and pollution.
+I’ve seen some people describe the caucus proposal as a “Ryan plan of the left,” but that’s unfair. There are no Ryan-style magic asterisks, trillion-dollar savings that are assumed to come from unspecified sources; this is an honest proposal. And “Back to Work” rests on solid macroeconomic analysis, not the fantasy “expansionary austerity” economics — the claim that slashing spending in a depressed economy somehow promotes job growth rather than deepening the depression — that Mr. Ryan continues to espouse despite the doctrine’s total failure in Europe.
+No, the only thing the progressive caucus and Mr. Ryan share is audacity. And it’s refreshing to see someone break with the usual Washington notion that political “courage” means proposing that we hurt the poor while sparing the rich. No doubt the caucus plan is too audacious to have any chance of becoming law; but the same can be said of the Ryan plan.
+So where is this all going? Realistically, we aren’t likely to get a Grand Bargain any time soon. Nonetheless, my sense is that there is some real movement here, and it’s in a direction conservatives won’t like.
+As I said, Mr. Ryan’s efforts are finally starting to get the derision they deserve, while progressives seem, at long last, to be finding their voice. Little by little, Washington’s fog of fiscal flimflam seems to be lifting.

data/test/documents/krugman-3 ADDED Viewed

@@ -0,0 +1,23 @@
+For three years and more, policy debate in Washington has been dominated by warnings about the dangers of budget deficits. A few lonely economists have tried from the beginning to point out that this fixation is all wrong, that deficit spending is actually appropriate in a depressed economy. But even though the deficit scolds have been wrong about everything so far — where are the soaring interest rates we were promised? — protests that we are having the wrong conversation have consistently fallen on deaf ears.
+What’s really remarkable at this point, however, is the persistence of the deficit fixation in the face of rapidly changing facts. People still talk as if the deficit were exploding, as if the United States budget were on an unsustainable path; in fact, the deficit is falling more rapidly than it has for generations, it is already down to sustainable levels, and it is too small given the state of the economy.
+Start with the raw numbers. America’s budget deficit soared after the 2008 financial crisis and the recession that went with it, as revenue plunged and spending on unemployment benefits and other safety-net programs rose. And this rise in the deficit was a good thing! Federal spending helped sustain the economy at a time when the private sector was in panicked retreat; arguably, the stabilizing role of a large government was the main reason the Great Recession didn’t turn into a full replay of the Great Depression.
+But after peaking in 2009 at $1.4 trillion, the deficit began coming down. The Congressional Budget Office expects the deficit for fiscal 2013 (which began in October and is almost half over) to be $845 billion. That may still sound like a big number, but given the state of the economy it really isn’t.
+Bear in mind that the budget doesn’t have to be balanced to put us on a fiscally sustainable path; all we need is a deficit small enough that debt grows more slowly than the economy. To take the classic example, America never did pay off the debt from World War II — in fact, our debt doubled in the 30 years that followed the war. But debt as a percentage of G.D.P. fell by three-quarters over the same period.
+Right now, a sustainable deficit would be around $460 billion. The actual deficit is bigger than that. But according to new estimates by the budget office, half of our current deficit reflects the effects of a still-depressed economy. The “cyclically adjusted” deficit — what the deficit would be if we were near full employment — is only about $423 billion, which puts it in the sustainable range; next year the budget office expects that number to fall to just $172 billion. And that’s why budget office projections show the nation’s debt position more or less stable over the next decade.
+So we do not, repeat do not, face any kind of deficit crisis either now or for years to come.
+There are, of course, longer-term fiscal issues: rising health costs and an aging population will put the budget under growing pressure over the course of the 2020s. But I have yet to see any coherent explanation of why these longer-run concerns should determine budget policy right now. And as I said, given the needs of the economy, the deficit is currently too small.
+Put it this way: Smart fiscal policy involves having the government spend when the private sector won’t, supporting the economy when it is weak and reducing debt only when it is strong. Yet the cyclically adjusted deficit as a share of G.D.P. is currently about what it was in 2006, at the height of the housing boom — and it is headed down.
+Yes, we’ll want to reduce deficits once the economy recovers, and there are gratifying signs that a solid recovery is finally under way. But unemployment, especially long-term unemployment, is still unacceptably high. “The boom, not the slump, is the time for austerity,” John Maynard Keynes declared many years ago. He was right — all you have to do is look at Europe to see the disastrous effects of austerity on weak economies. And this is still nothing like a boom.
+Now, I’m aware that the facts about our dwindling deficit are unwelcome in many quarters. Fiscal fearmongering is a major industry inside the Beltway, especially among those looking for excuses to do what they really want, namely dismantle Medicare, Medicaid and Social Security. People whose careers are heavily invested in the deficit-scold industry don’t want to let evidence undermine their scare tactics; as the deficit dwindles, we’re sure to encounter a blizzard of bogus numbers purporting to show that we’re still in some kind of fiscal crisis.
+But we aren’t. The deficit is indeed dwindling, and the case for making the deficit a central policy concern, which was never very strong given low borrowing costs and high unemployment, has now completely vanished.

data/test/documents/krugman-4 ADDED Viewed

@@ -0,0 +1,27 @@
+ Four years ago, as a newly elected president began his efforts to rescue the economy and strengthen the social safety net, conservative economic pundits — people who claimed to understand markets and know how to satisfy them — warned of imminent financial disaster. Stocks, they declared, would plunge, while interest rates would soar.
+Even a casual trawl through the headlines of the time turns up one dire pronouncement after another. “Obama’s radicalism is killing the Dow,” warned an op-ed article by Michael Boskin, an economic adviser to both Presidents Bush. “The disciplinarians of U.S. policy makers return,” declared The Wall Street Journal, warning that the “bond vigilantes” would soon push Treasury yields to destructive heights.
+Sure enough, this week the Dow Jones industrial average has been hitting all-time highs, while the current yield on 10-year U.S. government bonds is roughly half what it was when The Journal published that screed.
+O.K., everyone makes a bad prediction now and then. But these predictions have special significance, and not just because the people who made them have had such a remarkable track record of error these past several years.
+No, the important point about these particular bad predictions is that they came from people who constantly invoke the potential wrath of the markets as a reason we must follow their policy advice. Don’t try to cover America’s uninsured, they told us; if you do, you will undermine business confidence and the stock market will tank. Don’t try to reform Wall Street, or even criticize its abuses; you’ll hurt the plutocrats’ feelings, and that will lead to plunging markets. Don’t try to fight unemployment with higher government spending; if you do, interest rates will skyrocket.
+And, of course, do slash Social Security, Medicare and Medicaid right away, or the markets will punish you for your presumption.
+By the way, I’m not just talking about the hard right; a fair number of self-proclaimed centrists play the same game. For example, two years ago, Erskine Bowles and Alan Simpson warned us to expect an attack of the bond vigilantes within, um, two years unless we adopted, you guessed it, Simpson-Bowles.
+So what the bad predictions tell us is that we are, in effect, dealing with priests who demand human sacrifices to appease their angry gods — but who actually have no insight whatsoever into what those gods actually want, and are simply projecting their own preferences onto the alleged mind of the market.
+What, then, are the markets actually telling us?
+I wish I could say that it’s all good news, but it isn’t. Those low interest rates are the sign of an economy that is nowhere near to a full recovery from the financial crisis of 2008, while the high level of stock prices shouldn’t be cause for celebration; it is, in large part, a reflection of the growing disconnect between productivity and wages.
+The interest-rate story is fairly simple. As some of us have been trying to explain for four years and more, the financial crisis and the bursting of the housing bubble created a situation in which almost all of the economy’s major players are simultaneously trying to pay down debt by spending less than their income. Since my spending is your income and your spending is my income, this means a deeply depressed economy. It also means low interest rates, because another way to look at our situation is, to put it loosely, that right now everyone wants to save and nobody wants to invest. So we’re awash in desired savings with no place to go, and those excess savings are driving down borrowing costs.
+Under these conditions, of course, the government should ignore its short-run deficit and ramp up spending to support the economy. Unfortunately, policy makers have been intimidated by those false priests, who have convinced them that they must pursue austerity or face the wrath of the invisible market gods.
+Meanwhile, about the stock market: Stocks are high, in part, because bond yields are so low, and investors have to put their money somewhere. It’s also true, however, that while the economy remains deeply depressed, corporate profits have staged a strong recovery. And that’s a bad thing! Not only are workers failing to share in the fruits of their own rising productivity, hundreds of billions of dollars are piling up in the treasuries of corporations that, facing weak consumer demand, see no reason to put those dollars to work.
+So the message from the markets is by no means a happy one. What the markets are clearly saying, however, is that the fears and prejudices that have dominated Washington discussion for years are entirely misguided. And they’re also telling us that the people who have been feeding those fears and peddling those prejudices don’t have a clue about how the economy actually works.

data/test/documents/krugman-5 ADDED Viewed

@@ -0,0 +1,25 @@
+Conservatives like to say that their position is all about economic freedom, and hence making government’s role in general, and government spending in particular, as small as possible. And no doubt there are individual conservatives who really have such idealistic motives.
+When it comes to conservatives with actual power, however, there’s an alternative, more cynical view of their motivations — namely, that it’s all about comforting the comfortable and afflicting the afflicted, about giving more to those who already have a lot. And if you want a strong piece of evidence in favor of that cynical view, look at the current state of play over Medicaid.
+Some background: Medicaid, which provides health insurance to lower-income Americans, is a highly successful program that’s about to get bigger, because an expansion of Medicaid is one key piece of the Affordable Care Act, a k a Obamacare.
+There is, however, a catch. Last year’s Supreme Court decision upholding Obamacare also opened a loophole that lets states turn down the Medicaid expansion if they choose. And there has been a lot of tough talk from Republican governors about standing firm against the terrible, tyrannical notion of helping the uninsured.
+Now, in the end most states will probably go along with the expansion because of the huge financial incentives: the federal government will pay the full cost of the expansion for the first three years, and the additional spending will benefit hospitals and doctors as well as patients. Still, some of the states grudgingly allowing the federal government to help their neediest citizens are placing a condition on this aid, insisting that it must be run through private insurance companies. And that tells you a lot about what conservative politicians really want.
+Consider the case of Florida, whose governor, Rick Scott, made his personal fortune in the health industry. At one point, by the way, the company he built pleaded guilty to criminal charges, and paid $1.7 billion in fines related to Medicare fraud. Anyway, Mr. Scott got elected as a fierce opponent of Obamacare, and Florida participated in the suit asking the Supreme Court to declare the whole plan unconstitutional. Nonetheless, Mr. Scott recently shocked Tea Party activists by announcing his support for the Medicaid expansion.
+But his support came with a condition: he was willing to cover more of the uninsured only after receiving a waiver that would let him run Medicaid through private insurance companies. Now, why would he want to do that?
+Don’t tell me about free markets. This is all about spending taxpayer money, and the question is whether that money should be spent directly to help people or run through a set of private middlemen.
+And despite some feeble claims to the contrary, privatizing Medicaid will end up requiring more, not less, government spending, because there’s overwhelming evidence that Medicaid is much cheaper than private insurance. Partly this reflects lower administrative costs, because Medicaid neither advertises nor spends money trying to avoid covering people. But a lot of it reflects the government’s bargaining power, its ability to prevent price gouging by hospitals, drug companies and other parts of the medical-industrial complex.
+For there is a lot of price-gouging in health care — a fact long known to health care economists but documented especially graphically in a recent article in Time magazine. As Steven Brill, the article’s author, points out, individuals seeking health care can face incredible costs, and even large private insurance companies have limited ability to control profiteering by providers. Medicare does much better, and although Mr. Brill doesn’t point this out, Medicaid — which has greater ability to say no — seems to do better still.
+You might ask why, in that case, much of Obamacare will run through private insurers. The answer is, raw political power. Letting the medical-industrial complex continue to get away with a lot of overcharging was, in effect, a price President Obama had to pay to get health reform passed. And since the reward was that tens of millions more Americans would gain insurance, it was a price worth paying.
+But why would you insist on privatizing a health program that is already public, and that does a much better job than the private sector of controlling costs? The answer is pretty obvious: the flip side of higher taxpayer costs is higher medical-industry profits.
+So ignore all the talk about too much government spending and too much aid to moochers who don’t deserve it. As long as the spending ends up lining the right pockets, and the undeserving beneficiaries of public largess are politically connected corporations, conservatives with actual power seem to like Big Government just fine.

data/test/documents/lippert-1 ADDED Viewed

@@ -0,0 +1,105 @@
+Last time on FAIC I discussed the standard terminology used for the monad pattern: that the simple construction method is traditionally called "unit", and the function application method is traditionally called "bind". I also pointed out that the two sub-patterns you see most frequently in implementations of the monad pattern are first, association of some state with a value, and second, construction of workflows that describe the sequencing of units of work to be performed in the future. Today we'll look at that first kind of monad in some more detail.
+We've been talking extensively about the nullable monad; it essentially associates a single Boolean value with the underlying value. If the Boolean value is true then the value behaves normally; if not, then the "nullness" of the value propagates through the system. 1 I won't labour that example more. Rather, let's look at two variations on the nullable monad:
+struct Tainted<T>
+{
+  public T Value { get; private set; }
+  public bool IsTainted { get; private set; }
+  private Tainted(T value, bool isTainted) : this()
+  {
+    this.Value = value;
+    this.IsTainted = isTainted;
+  }
+  public static Tainted<T> MakeTainted(T value)
+  {
+    return new Tainted<T>(value, true);
+  }
+  public static Tainted<T> MakeClean(T value)
+  {
+    return new Tainted<T>(value, false);
+  }
+  public static Tainted<R> Bind<A, R>(
+    Tainted<A> tainted, Func<A, Tainted<R>> function)
+  {
+    Tainted<R> result = function(tainted.Value);
+    if (tainted.IsTainted && !result.IsTainted)
+      return new Tainted<R>(result.Value, true);
+    else
+      return result;
+  }
+}
+(Note that I've started calling the bind function "Bind" rather than "ApplySpecialFunction".)
+The semantics of this monad should be pretty clear. We associated a "taint" with a value, and any function that takes as its argument a "dirty" value results in an automatically dirty value. Or, put another way, the only way for a function applied to an instance of this monad can result in a clean result is if the original value was clean and the function produced a clean value. You could use this monad to "amplify" a string so that you could determine if it had been checked for cross-site scripting attacks; strings are presumed dirty until proven clean, and any operation on a dirty string produces another dirty string.
+This example was very simple; you might imagine that the "security" state stored here could be far more complex. Rather than a simple Boolean taint, the taint might be an identifier determining which client originated the data. Anything is possible.
+Here's a second variation on the nullable monad:
+struct NoThrow<T>
+{
+  private T value;
+  public T Value
+  {
+    get
+    {
+      if (this.Exception != null)
+        throw this.Exception;
+      else
+        return this.Value;
+    }
+  }
+  public Exception Exception { get; private set; }
+  private NoThrow(T value, Exception exception) : this()
+  {
+    this.value = value;
+    this.Exception = exception;
+  }
+  public NoThrow(T value) : this(value, null) {}
+  public static NoThrow<R> Bind<A, R>(
+    NoThrow<A> noThrow, Func<A, NoThrow<R>> function)
+  {
+    if (noThrow.Exception != null)
+      return new NoThrow<R>(default(R), noThrow.Exception);
+    R newValue;
+    try
+    {
+      newValue = function(noThrow.Value));
+    }
+    catch(Exception ex)
+    {
+      return new NoThrow<R>(default(R), ex);
+    }
+    return new NoThrow<R>(newValue, null);
+  }
+}
+Again, the semantics should be pretty clear; this is a buffed-up version of the nullable monad, where a value is "null" if any operation that produced it threw an exception. This monad will be very familiar to anyone who has used the asynchronous monad Task<T> because of course it does much the same thing: if the asynchronous task throws an exception then the exception is stored in the task, and re-thrown when the value of the task is fetched.2
+You can associate any kind of state whatsoever with a value using a monad. For example, for debugging purposes we might want to accumulate a log describing how it flowed through the program:
+struct Logged<T>
+{
+  public T Value { get; private set; }
+  public string Log { get; private set; }
+  public Logged(T value, string log) : this()
+  {
+    this.value = value;
+    this.log = log;
+  }
+  public Logged(T value) : this(value, null) {}
+  public Logged<T> AddLog(string newLog)
+  {
+    return new Logged(this.value, this.log + newLog);
+  }
+  public static Logged<R> Bind<A, R>(
+    Logged<A> logged, Func<A, Logged<R>> function)
+  {
+    Logged result = function(logged.Value);
+    return new Logged(result.Value, logged.Log + result.Log);
+  }
+}
+These are just some of the simpler "associate data with a value" monads that are possible, but I suspect that you get the idea. Next time on FAIC we'll take a closer look at query expression syntax; it turns out to have a strong connection with monadic binding.

data/test/documents/lippert-2 ADDED Viewed

@@ -0,0 +1,17 @@
+Though we've exchanged a few emails over the years, I've only met Joel Spolsky once, back in 2005, and since he was surrounded by a protective layer of adoring fanboys we didn't get to chat much. So it was a genuine pleasure to finally spend the better part of an hour chatting with Joel, David, Jay and Alex - them in New York, me in Seattle, Skype is a wonderful thing. If you like long, rambling conversations full of obscure facts about old programming languages, you could do worse than this podcast. The link to the podcast post is here.
+A few supplemental links for some of the topics we cover:
+    The WATFIV FORTRAN compiler and the Waterloo BASIC chip that shipped on the SuperPET 90001 were technically not actually written at WATCOM because WATCOM wasn't a company until 1981. However, WATCOM was formed out of the Computer Systems Group at the University of Waterloo and inherited a bunch of the software written there.
+    I talked about IDispatch default property semantics here. I document some corner cases in VBScript default property semantics here. I mentioned that I left some detailed comments about those features in the VBScript IntelliSense code in Visual InterDev; you can read one of those comments here.
+    Some details on VBScript's wacky error handling mechansisms are here.
+    I mentioned that "soft" vs "hard" types change the comparison semantics of VBScript. Details here.
+    If you pass a variable containing an object to an out-only COM method from VBScript, the object will leak.
+    What do you mean, I can't use parentheses when calling a sub?
+    Whose Line? and StackOverflow have something in common: everything is made up and the points don't matter.
+    Answering questions is a great way to become a recognized expert.
+    I didn't mention this during the podcast, but if you want a good overview of what are the challenges of making a static analyzer that is good enough to sell for money, this article is excellent.
+    What was that throwaway line about programming on a boat all about?
+    My article on unsafe array covariance is here.
+    Joel On Turtles.
+    My post about me and fellow programmer Ryan Gosling.

data/test/documents/lippert-3 ADDED Viewed

@@ -0,0 +1,25 @@
+Last time on FAIC we managed to finally state the rules of the monad pattern; of course, we've known for some time that the key parts of the pattern are the constructor helper, which we've been calling CreateSimpleM<T> and the function application helper, which we've been calling ApplySpecialFunction<A, R>. Needless to say, these are not the standard names for these functions. 1
+The traditional name for the constructor function is "unit", which I suppose makes some sense. In Haskell, a purely functional programming language that makes heavy use of monads, the unit function is called return, for reasons which are a bit tricky to explain without a background in Haskell.2 In C# there is no particular standard. Of the five monadic types we've been using as examples, there are five different ways to construct a monad instance from a simple value:
+Nullable<int> nullable = new Nullable<int>(123);
+Task<int> task = Task.FromResult<int>(123);
+Lazy<int> lazy = new Lazy<int>(() => 123);
+OnDemand<int> onDemand = () => 123;
+IEnumerable<int> sequence = Enumerable.Repeat<int>(123, 1);
+And frankly, that last one is a bit dodgy. I wish there was a static method on Enumerable specifically for making a one-element sequence.
+The traditional name for the "function application" helper is "bind". In Haskell the bind function is actually an infix3 operator; in Haskell to apply a function f to an instance of a monad m, you'd say m >>= f. In C# the bind function is usually not provided explicitly and therefore usually does not have a name.4
+"Unit" makes some sense but what on earth could "bind" mean? And what's with that crazy Haskell syntax?
+You might have noticed that the asynchronous, lazy, on-demand and sequence monads all have an interesting common property: when you apply a function to any of these monads, what you get back is an object that will perform that function in the future. Essentially, the bind function takes an immutable workflow and its subsequent step, and returns you the resulting new workflow. So m >>= f  means "bind operation f onto the end of workflow m and give me the resulting new workflow". The Haskell syntax is actually quite appropriate; you get the sense that the workflow is feeding its result into the next function.
+Let's be clear on this: the bind operation takes a workflow and a function and gives you back a new workflow that feeds the result of the original workflow into the new function when the new workflow is executed. The bind operator does not execute the workflow; it makes a new workflow out of an old one.5
+How those workflows are actually executed depends on the semantics of the monad, of course. A portion of the workflow of the sequence monad is activated whenever MoveNext is called, and it executes until the next value in the sequence can be computed. The workflow of the lazy monad is activated the first time the Value property is fetched; after that, it uses the cached value. The workflow of the on-demand monad is activated by invoking the delegate. And the workflow of the asynchronous monad is activated... well, whenever the task is scheduled to execute!
+That is the whole point of those particular monads: they represent a bunch of work that is to be done and the order in which to do it. By contrast, the far simpler nullable monad6 doesn't represent a workflow to be performed in the future. Rather, it represents the association of extra state -- a single Boolean -- with a value. Computations performed on it are done so "eagerly" rather than being deferred until the future.
+Next time on FAIC: I'm the special guest on the StackExchange podcast, so we'll digress briefly. When we continue the series we'll come up with a few ad hoc examples of "state" monads to explore this concept further.

data/test/documents/lippert-4 ADDED Viewed

@@ -0,0 +1,102 @@
+Way back in 1992 I was studying linear algebra at Waterloo. I just could not seem to wrap my head around dual spaces. Then one night I went to sleep after studying algebra for several hours, and I dreamed about dual spaces. When I awoke I had a clear and intuitive understanding of the concept. Apparently my brain had decided to sort it all out in my sleep. It was a bizarre experience that never happened again.1 History is full of examples of people who had sudden insights that solved tricky problems. The tragically short-lived mathematician Srinivasa Ramanujan claimed that he dreamed of vast scrolls of mathematics, most of which turned out to be both correct and strikingly original.
+There is of course a difficulty with waiting for a solution to appear in a dream: you never know when that's going to happen. Since insight is unreliable, we've developed a far more reliable technique for solving tough problems: recursive divide and conquer. We solve problems the same way that a recursive method solves problems:
+    Is the current problem trivial? If so, solve it. Otherwise, break the current problem down into one or more smaller problems. Recursively solve the smaller problems and then compose those solutions into a solution to the larger problem.
+It's "composition" that I want to talk about today. Composition is the act of combining two or more solutions to smaller problems into a single abstraction that solves a larger problem. We do this so often when writing computer programs, it's like the air we breathe. It's all around us but we don't think about it that often. Here we have a composition of two properties with an operator; the result of the composition is a third property:
+public double Area
+{
+  get { return this.Length * this.Width; }
+}
+And of course whatever Rectangle type this is has probably composed two values of Point type for the corners, which have in turn composed two double values for the coordinates, and so on. All the mechanisms of a modern, pragmatic programming language are there to make it easy to compose solutions to smaller problems into solutions of larger problems.
+Thus there are an enormous number of different kinds of composition available to C# programmers. Today I want to talk about a very specific kind of composition: composition of non-void functions of one parameter. This is one of the most basic of compositions.2 As a silly illustrative example, if you have:
+static long Cube(int x) { return (long)x * x * x; }
+static double Halve(long y) { return y / 2.0; }
+then you can always make a third function that composes these two:
+static double HalveTheCube(int x) { return Halve(Cube(x)); }
+Typically when we write programs, the program text itself describes a whole pile of compositions, each rather more complex than these simple function-of-one-parameter compositions. But we can also perform function compositions dynamically if we want to, using delegates:
+Func<int, long> cube = x => (long)x * x * x;
+Func<long, double> halve = y => y / 2.0;
+Func<int, double> both = z => halve(cube(z));
+And in fact, we could even make a method that does it for us:
+static Func<X, Z> Compose<X, Y, Z>(
+  Func<X, Y> f,
+  Func<Y, Z> g)
+{
+  return x => g(f(x));
+}
+And then we could say:
+Func<int, long> cube = x => (long)x * x * x;
+Func<long, double> halve = y => y / 2.0;
+Func<int, double> both = Compose(cube, halve);
+Of course you would never actually do that, because function composition has such a lovely syntax already in C#. But logically, this is what you are doing every time you write a program where the result of one function is fed into the next: you are composing the two functions into a third.
+Notice that of course in order to be composed, the return type of the "inner" function must be implicitly convertible to the parameter type of the "outer" function. Which brings us back to the topic at hand: the final rule of the monad pattern for types. We've been talking about "special" functions that return an instance of a monadic type. Suppose we have two such functions:
+Func<int, Nullable<double>> log = x => x > 0 ?
+  new Nullable<double>(Math.Log(x)) : new Nullable<double>();
+Func<double, Nullable<decimal>> toDecimal = y => Math.Abs(y) < decimal.MaxValue :
+  new Nullable<decimal>((decimal)y) : new Nullable<decimal>();
+Func<int, Nullable<decimal>> both = Compose(log, toDecimal);
+That doesn't work. toDecimal takes a double, but log returns a Nullable<double>. What do we want to happen? Clearly we want to say that the result of the composed functions is null if log returns null, and otherwise passes the underlying value along to toDecimal. But we already have a function that does precisely that: ApplySpecialFunction! And therefore we can build a monadic composition helper:
+static Func<X, Nullable<Z>> ComposeSpecial<X, Y, Z>(
+  Func<X, Nullable<Y>> f,
+  Func<Y, Nullable<Z>> g)
+{
+  return x => ApplySpecialFunction(f(x), g);
+}
+Now we can say:
+Func<int, Nullable<decimal>> both = ComposeSpecial(log, toDecimal);
+The ApplySpecialFunction helper method enables us to apply any function to a monadic type, which is awesome. But in doing so it also enables us to compose any two functions that return that type!
+I said last time that we were finally going to get to the last rule of the monad pattern, and at long last we've arrived. The last rule is: the ApplySpecialFunction helper must ensure that composition works. In code:
+Func<X, M<Y>> f = whatever;
+Func<Y, M<Z>> g = whatever;
+M<X> mx = whatever;
+M<Y> my = ApplySpecialFunction(mx, f);
+M<Z> mz1 = ApplySpecialFunction(my, g);
+Func<X, M<Z>> h = ComposeSpecial(f, g);
+M<Z> mz2 = ApplySpecialFunction(mx, h);
+We require that mz1 and mz2 be semantically the same. Applying f to some value and then applying g to the result must be logically the same as first composing f with g and then applying the composition to the value.3
+Finally we've got all the small details taken care of and we can correctly describe the monad pattern in C#:
+A monad is a generic type M<T> such that:
+    There is some sort of construction mechanism that takes a T and returns an M<T>. We've been characterizing this as a method with signature
+static M<T> CreateSimpleM<T>(T t)
+    Also there is some way of applying a function that takes the underlying type to a monad of that type. We've been characterizing this as a method with signature:
+static M<R> ApplySpecialFunction<A, R>(
+  M<A> monad, Func<A, M<R>> function)
+Finally, both these methods must obey the monad laws, which are:
+    Applying the construction function to a given instance of the monad produces a logically identical instance of the monad.
+    Applying a function to the result of the construction function on a value, and applying that function to the value directly, produces two logically identical instances of the monad.
+    Applying to a value a first function followed by applying to the result a second function, and applying to the original value a third function that is the composition of the first and second functions, produces two logically identical instances of the monad.
+Whew! And now perhaps you see why I started this series all those weeks ago with the idea of exploring the pattern by looking at examples, rather than starting in with the monad laws.

data/test/documents/lippert-5 ADDED Viewed

@@ -0,0 +1,92 @@
+Last time in this series we finally worked out the actual rules for the monad pattern. The pattern in C# is that a monad is a generic type M<T> that "amplifies" the power of a type T. There is always a way to construct an M<T> from a value of T, which we characterized as the existence of a helper method:
+static M<T> CreateSimpleM<T>(T t)
+And if you have a function that takes any type A and produces an M<R> then there is a way to apply that function to an instance of M<A> in a way that still produces an M<R>. We characterized this as the existence of a helper method:
+static M<R> ApplySpecialFunction<A, R>(
+  M<A> wrapped,
+  Func<A, M<R>> function)
+Is that it? Not quite. In order to actually be a valid implementation of the monad pattern, these two helper methods need to have a few additional restrictions placed on them, to ensure that they are well-behaved. Specifically: the construction helper function can be thought of as "wrapping up" a value, and the application helper function knows how to "unwrap" a value; it seems reasonable that we require that wrapping and unwrapping operations preserve the value.
+With that in mind, we notice that ApplySpecialFunction takes as its second argument a function from A to M<R>, for any A and any R. But CreateSimpleM is a function from T to M<T>, and is therefore a possible argument to ApplySpecialFunction! Suppose we have: 1
+static Nullable<T> CreateSimpleNullable<T>(T t)
+{
+  return new Nullable<T>(t);
+}
+static Nullable<R> ApplySpecialFunction<A, R>(
+  Nullable<A> nullable,
+  Func<A, Nullable<R>> function)
+{
+  return nullable.HasValue ?
+    function(nullable.Value) :
+    new Nullable<R>();
+}
+And then we notice that CreateSimpleNullable has the correct signature to be passed as the second argument to ApplySpecialFunction:
+Nullable<int> original = Whatever();
+Nullable<int> result = ApplySpecialFunction(original, CreateSimpleNullable);
+Work your way through what happens here. If original is null then we get null back out. If original has a value, say, 12, then we unwrap it, pass it to MakeSimpleNullable, and get a wrapped 12 back out! The rule is:
+Applying the "make a simple wrapper around this value" function to a monad value must produce the same monad value.
+And in this case we actually have value identity. Now, I note that we are not requiring referential identity here, should the monadic type happen to be a reference type. Let's consider our OnDemand<T> monad:
+static OnDemand<T> CreateSimpleOnDemand<T>(T t)
+{
+  return () => t;
+}
+static OnDemand<R> ApplySpecialFunction<A, R>(
+  OnDemand<A> onDemand,
+  Func<A, OnDemand<R>> function)
+{
+  return ()=>function(onDemand())();
+}
+If we have
+OnDemand<int> original = () => DateTime.Now.Seconds;
+OnDemand<int> result = ApplySpecialFunction(original, CreateSimpleOnDemand);
+Then original and result are certainly not reference equal. But both original and result do the same thing: when you call them, they tell you what the current second is. The latter unfortunately jumps through several unnecessary hoops in order to do so, but it gets there in the end.
+In some implementations of the monad pattern it might be cheap and easy to ensure that the two instances be referentially identical, and obviously that would be great. But all that is actually required is that the original and resulting instances be semantically identical when the simple construction function is applied to an existing monad.
+The next restriction is that the "simple wrapper around a value" actually act like a simple wrapper around a value. But how can that be precisely characterized? Easily enough with the two helper methods we have. Let's look at an example. Recall that we had a SafeLog method:
+static Nullable<double> SafeLog(int value) { ... }
+Now suppose we had:
+int original = 123;
+Nullable<double> result1 = SafeLog(original);
+Nullable<int> nullable = CreateSimpleNullable(original);
+Nullable<double> result2 = ApplySpecialFunction(nullable, SafeLog);
+You would expect that result1 and result2 would be the same nullable double, right? If Nullable<int> is just a simple wrapper around an int then applying a function to it should be just the same as applying the function to the original integer value. We can generalize this rule and say that the result of applying a special function to an value, and to the same value "wrapped up in a monad", must be the same. 2
+OK, so let's once again sum up. The rules of the monad pattern are that a monadic type M<T> provides operations that are logically equivalent to methods:
+static M<T> CreateSimpleM<T>(T t) { ... }
+static M<R> ApplySpecialFunction<A, R>(
+  M<A> monad, Func<A, M<R>> function) {...}
+subject to the restrictions that:
+ApplySpecialFunction(someMonadValue, CreateSimpleM)
+results in a value logically identical to someMonadValue, and that
+ApplySpecialFunction(CreateSimpleM(someValue), someFunction)
+results in a value logically identical to
+someFunction(someValue)
+Are we done yet? Please?
+Sigh. No. We are still missing one rule of the monad pattern but I promise, this is the last one. Next time on FAIC we'll discuss the nature of programming, and, for that matter, all problem solving, and then see how the monad pattern fits into that. Along the way we'll deduce the last rule.

data/test/documents/schneier-1 ADDED Viewed

@@ -0,0 +1,35 @@
+A core, not side, effect of technology is its ability to magnify power and multiply force -- for both attackers and defenders. One side creates ceramic handguns, laser-guided missiles, and new-identity theft techniques, while the other side creates anti-missile defense systems, fingerprint databases, and automatic facial recognition systems.
+The problem is that it's not balanced: Attackers generally benefit from new security technologies before defenders do. They have a first-mover advantage. They're more nimble and adaptable than defensive institutions like police forces. They're not limited by bureaucracy, laws, or ethics. They can evolve faster. And entropy is on their side -- it's easier to destroy something than it is to prevent, defend against, or recover from that destruction.
+For the most part, though, society still wins. The bad guys simply can't do enough damage to destroy the underlying social system. The question for us is: can society still maintain security as technology becomes more advanced?
+I don't think it can.
+Because the damage attackers can cause becomes greater as technology becomes more powerful. Guns become more harmful, explosions become bigger, malware becomes more pernicious...and so on. A single attacker, or small group of attackers, can cause more destruction than ever before.
+This is exactly why the whole post-9/11 weapons-of-mass-destruction debate was so overwrought: Terrorists are scary, terrorists flying airplanes into buildings are even scarier, and the thought of a terrorist with a nuclear bomb is absolutely terrifying.
+As the destructive power of individual actors and fringe groups increases, so do the calls for -- and society's acceptance of -- increased security.
+Traditional security largely works "after the fact". We tend not to ban or restrict the objects that can do harm; instead, we punish the people who do harm with objects. There are exceptions, of course, but they're exactly that: exceptions. This system works as long as society can tolerate the destructive effects of those objects (for example, allowing people to own baseball bats and arresting them after they use them in a riot is only viable if society can tolerate the potential for riots).
+When that isn't enough, we resort to "before-the-fact" security measures. These come in two basic varieties: general surveillance of people in an effort to stop them before they do damage, and specific interdictions in an effort to stop people from using those technologies to do damage.
+But these measures work better at keeping dangerous technologies out of the hands of amateurs than at keeping them out of the hands of professionals.
+And in the global interconnected world we live in, they're not anywhere close to foolproof. Still, a climate of fear causes governments to try. Lots of technologies are already restricted: entire classes of drugs, entire classes of munitions, explosive materials, biological agents. There are age restrictions on vehicles and training restrictions on complex systems like aircraft. We're already almost entirely living in a surveillance state, though we don't realize it or won't admit it to ourselves. This will only get worse as technology advances… today's Ph.D. theses are tomorrow's high-school science-fair projects.
+Increasingly, broad prohibitions on technologies, constant ubiquitous surveillance, and Minority Report-like preemptive security will become the norm. We can debate the effectiveness of various security measures in different circumstances. But the problem isn't that these security measures won't work -- even as they shred our freedoms and liberties -- it's that no security is perfect.
+Because sooner or later, the technology will exist for a hobbyist to explode a nuclear weapon, print a lethal virus from a bio-printer, or turn our electronic infrastructure into a vehicle for large-scale murder. We'll have the technology eventually to annihilate ourselves in great numbers, and sometime after, that technology will become cheap enough to be easy.
+As it gets easier for one member of a group to destroy the entire group, and the group size gets larger, the odds of someone in the group doing it approaches certainty. Our global interconnectedness means that our group size encompasses everyone on the planet, and since government hasn't kept up, we have to worry about the weakest-controlled member of the weakest-controlled country. Is this a fundamental limitation of technological advancement, one that could end civilization? First our fears grip us so strongly that, thinking about the short term, we willingly embrace a police state in a desperate attempt to keep us safe; then, someone goes off and destroys us anyway?
+If security won't work in the end, what is the solution?
+Resilience -- building systems able to survive unexpected and devastating attacks -- is the best answer we have right now. We need to recognize that large-scale attacks will happen, that society can survive more than we give it credit for, and that we can design systems to survive these sorts of attacks. Calling terrorism an existential threat is ridiculous in a country where more people die each month in car crashes than died in the 9/11 terrorist attacks.
+If the U.S. can survive the destruction of an entire city -- witness New Orleans after Hurricane Katrina or even New York after Sandy -- we need to start acting like it, and planning for it. Still, it's hard to see how resilience buys us anything but additional time. Technology will continue to advance, and right now we don't know how to adapt any defenses -- including resilience -- fast enough.
+We need a more flexible and rationally reactive approach to these problems and new regimes of trust for our information-interconnected world. We're going to have to figure this out if we want to survive, and I'm not sure how many decades we have left.

data/test/documents/schneier-2 ADDED Viewed

@@ -0,0 +1,25 @@
+For technology that was supposed to ignore borders, bring the world closer together, and sidestep the influence of national governments, the Internet is fostering an awful lot of nationalism right now. We've started to see increased concern about the country of origin of IT products and services; U.S. companies are worried about hardware from China; European companies are worried about cloud services in the U.S; no one is sure whether to trust hardware and software from Israel; Russia and China might each be building their own operating systems out of concern about using foreign ones.
+I see this as an effect of all the cyberwar saber-rattling that's going on right now. The major nations of the world are in the early years of a cyberwar arms race, and we're all being hurt by the collateral damage.
+A commentator on Al Jazeera makes a similar point.
+Our nationalist worries have recently been fueled by a media frenzy surrounding attacks from China. These attacks aren't new -- cyber-security experts have been writing about them for at least a decade, and the popular media reported about similar attacks in 2009 and again in 2010 -- and the current allegations aren't even very different than what came before. This isn't to say that the Chinese attacks aren't serious. The country's espionage campaign is sophisticated, and ongoing. And because they're in the news, people are understandably worried about them.
+But it's not just China. International espionage works in both directions, and I'm sure we are giving just as good as we're getting. China is certainly worried about the U.S. Cyber Command's recent announcement that it was expanding from 900 people to almost 5,000, and the NSA's massive new data center in Utah. The U.S. even admits that it can spy on non-U.S. citizens freely.
+The fact is that governments and militaries have discovered the Internet; everyone is spying on everyone else, and countries are ratcheting up offensive actions against other countries.
+At the same time, many nations are demanding more control over the Internet within their own borders. They reserve the right to spy and censor, and to limit the ability of others to do the same. This idea is now being called the "cyber sovereignty movement," and gained traction at the International Telecommunications Union meeting last December in Dubai. One analyst called that meeting the "Internet Yalta," where the Internet split between liberal-democratic and authoritarian countries. I don't think he's exaggerating.
+Not that this is new, either. Remember 2010, when the governments of the UAE, Saudi Arabia, and India demanded that RIM give them the ability to spy on BlackBerry PDAs within their borders? Or last year, when Syria used the Internet to surveil its dissidents? Information technology is a surprisingly powerful tool for oppression: not just surveillance, but censorship and propaganda as well. And countries are getting better at using that tool.
+But remember: none of this is cyberwar. It's all espionage, something that's been going on between countries ever since countries were invented. What moves public opinion is less the facts and more the rhetoric, and the rhetoric of war is what we're hearing.
+The result of all this saber-rattling is a severe loss of trust, not just amongst nation-states but between people and nation-states. We know we're nothing more than pawns in this game, and we figure we'll be better off sticking with our own country.
+Unfortunately, both the reality and the rhetoric play right into the hands of the military and corporate interests that are behind the cyberwar arms race in the first place. There is an ermsmous amount of power at stake here: not only power within governments and militaries, but power and profit amongst the corporations that supply the tools and infrastructure for cyber-attack and cyber-defense. The more we believe we are "at war" and believe the jingoistic rhetoric, the more willing we are to give up our privacy, freedoms, and control over how the Internet is run.
+Arms races are fueled by two things: ignorance and fear. We don't know the capabilities of the other side, and we fear that they are more capable than we are. So we spend more, just in case. The other side, of course, does the same. That spending will result in more cyber weapons for attack and more cyber-surveillance for defense. It will result in more government control over the protocols of the Internet, and less free-market innovation over the same. At its worst, we might be about to enter an information-age Cold War: one with more than two "superpowers." Aside from this being a bad future for the Internet, this is inherently destabilizing. It's just too easy for this amount of antagonistic power and advanced weaponry to get used: for a mistaken attribution to be reacted to with a counterattack, for a misunderstanding to become a cause for offensive action, or for a minor skirmish to escalate into a full-fledged cyberwar.
+Nationalism is rife on the Internet, and it's getting worse. We need to damp down the rhetoric and-more importantly-stop believing the propaganda from those who profit from this Internet nationalism. Those who are beating the drums of cyberwar don't have the best interests of society, or the Internet, at heart.

data/test/documents/schneier-3 ADDED Viewed

@@ -0,0 +1,23 @@
+Should companies spend money on security awareness training for their employees? It's a contentious topic, with respected experts on both sides of the debate. I personally believe that training users in security is generally a waste of time, and that the money can be spent better elsewhere. Moreover, I believe that our industry's focus on training serves to obscure greater failings in security design.
+In order to understand my argument, it's useful to look at training's successes and failures. One area where it doesn't work very well is health. We are forever trying to train people to have healthier lifestyles: eat better, exercise more, whatever. And people are forever ignoring the lessons. One basic reason is psychological: we just aren't very good at trading off immediate gratification for long-term benefit. A healthier you is an abstract eventually; sitting in front of the television all afternoon with a McDonald's Super Monster Meal sounds really good right now. Similarly, computer security is an abstract benefit that gets in the way of enjoying the Internet. Good practices might protect me from a theoretical attack at some time in the future, but they're a lot of bother right now and I have more fun things to think about. This is the same trick Facebook uses to get people to give away their privacy; no one reads through new privacy policies; it's much easier to just click "OK" and start chatting with your friends. In short: security is never salient.
+Another other reason health training works poorly is that it's hard to link behaviors with benefits. We can train anyone -- even laboratory rats -- with a simple reward mechanism: push the button, get a food pellet. But with health, the connection is more abstract. If you're unhealthy, what caused it? It might have been something you did or didn't do years ago, it might have been one of the dozen things you have been doing and not doing for months, or it might have been the genes you were born with. Computer security is a lot like this, too.
+Training laypeople in pharmacology also isn't very effective. We expect people to make all sorts of medical decisions at the drugstore, and they're not very good at it. Turns out that it's hard to teach expertise. We can't expect every mother to have the knowledge of a doctor or pharmacist or RN, and we certainly can't expect her to become an expert when most of the advice she's exposed to comes from manufacturers' advertising. In computer security, too, a lot of advice comes from companies with products and services to sell.
+One area of health that is a training success is HIV prevention. HIV may be very complicated, but the rules for preventing it are pretty simple. And aside from certain sub-Saharan countries, we have taught people a new model of their health, and have dramatically changed their behavior. This is important: most lay medical expertise stems from folk models of health. Similarly, people have folk models of computer security. Maybe they're right and maybe they're wrong, but they're how people organize their thinking. This points to a possible way that computer security training can succeed. We should stop trying to teach expertise, and pick a few simple metaphors of security and train people to make decisions using those metaphors.
+On the other hand, we still have trouble teaching people to wash their hands -- even though it's easy, fairly effective, and simple to explain. Notice the difference, though. The risks of catching HIV are huge, and the cause of the security failure is obvious. The risks of not washing your hands are low, and it's not easy to tie the resultant disease to a particular not-washing decision. Computer security is more like hand washing than HIV.
+Another area where training works is driving. We trained, either through formal courses or one-on-one tutoring, and passed a government test, to be allowed to drive a car. One reason that works is because driving is a near-term, really cool, obtainable goal. Another reason is even though the technology of driving has changed dramatically over the past century, that complexity has been largely hidden behind a fairly static interface. You might have learned to drive thirty years ago, but that knowledge is still relevant today. On the other hand, password advice from ten years ago isn't relevant today. Can I bank from my browser? Are PDFs safe? Are untrusted networks okay? Is JavaScript good or bad? Are my photos more secure in the cloud or on my own hard drive? The 'interface' we use to interact with computers and the Internet changes all the time, along with best practices for computer security. This makes training a lot harder.
+Food safety is my final example. We have a bunch of simple rules -- cooking temperatures for meat, expiration dates on refrigerated goods, the three-second rule for food being dropped on the floor -- that are mostly right, but often ignored. If we can't get people to follow these rules, what hope do we have for computer security training?
+To those who think that training users in security is a good idea, I want to ask: "Have you ever met an actual user?" They're not experts, and we can't expect them to become experts. The threats change constantly, the likelihood of failure is low, and there is enough complexity that it's hard for people to understand how to connect their behavior to eventual outcomes. So they turn to folk remedies that, while simple, don't really address the threats.
+Even if we could invent an effective computer security training program, there's one last problem. HIV prevention training works because affecting what the average person does is valuable. Even if only half the population practices safe sex, those actions dramatically reduce the spread of HIV. But computer security is often only as strong as the weakest link. If four-fifths of company employees learn to choose better passwords, or not to click on dodgy links, one-fifth still get it wrong and the bad guys still get in. As long as we build systems that are vulnerable to the worst case, raising the average case won't make them more secure.
+The whole concept of security awareness training demonstrates how the computer industry has failed. We should be designing systems that won't let users choose lousy passwords and don't care what links a user clicks on. We should be designing systems that conform to their folk beliefs of security, rather than forcing them to learn new ones. Microsoft has a great rule about system messages that require the user to make a decision. They should be NEAT: necessary, explained, actionable, and tested. That's how we should be designing security interfaces. And we should be spending money on security training for developers. These are people who can be taught expertise in a fast-changing environment, and this is a situation where raising the average behavior increases the security of the overall system.
+If we security engineers do our job right, users will get their awareness training informally and organically, from their colleagues and friends. People will learn the correct folk models of security, and be able to make decisions using them. Then maybe an organization can spend an hour a year reminding their employees what good security means at that organization, both on the computer and off. That makes a whole lot more sense.

data/test/documents/schneier-4 ADDED Viewed

@@ -0,0 +1,33 @@
+I'm going to start with three data points.
+One: Some of the Chinese military hackers who were implicated in a broad set of attacks against the U.S. government and corporations were identified because they accessed Facebook from the same network infrastructure they used to carry out their attacks.
+Two: Hector Monsegur, one of the leaders of the LulzSac hacker movement, was identified and arrested last year by the FBI. Although he practiced good computer security and used an anonymous relay service to protect his identity, he slipped up.
+And three: Paula Broadwell, who had an affair with CIA director David Petraeus, similarly took extensive precautions to hide her identity. She never logged in to her anonymous e-mail service from her home network. Instead, she used hotel and other public networks when she e-mailed him. The FBI correlated hotel registration data from several different hotels -- and hers was the common name.
+The Internet is a surveillance state. Whether we admit it to ourselves or not, and whether we like it or not, we're being tracked all the time. Google tracks us, both on its pages and on other pages it has access to. Facebook does the same; it even tracks non-Facebook users. Apple tracks us on our iPhones and iPads. One reporter used a tool called Collusion to track who was tracking him; 105 companies tracked his Internet use during one 36-hour period.
+Increasingly, what we do on the Internet is being combined with other data about us. Unmasking Broadwell's identity involved correlating her Internet activity with her hotel stays. Everything we do now involves computers, and computers produce data as a natural by-product. Everything is now being saved and correlated, and many big-data companies make money by building up intimate profiles of our lives from a variety of sources.
+Facebook, for example, correlates your online behavior with your purchasing habits offline. And there's more. There's location data from your cell phone, there's a record of your movements from closed-circuit TVs.
+This is ubiquitous surveillance: All of us being watched, all the time, and that data being stored forever. This is what a surveillance state looks like, and it's efficient beyond the wildest dreams of George Orwell.
+Sure, we can take measures to prevent this. We can limit what we search on Google from our iPhones, and instead use computer web browsers that allow us to delete cookies. We can use an alias on Facebook. We can turn our cell phones off and spend cash. But increasingly, none of it matters.
+There are simply too many ways to be tracked. The Internet, e-mail, cell phones, web browsers, social networking sites, search engines: these have become necessities, and it's fanciful to expect people to simply refuse to use them just because they don't like the spying, especially since the full extent of such spying is deliberately hidden from us and there are few alternatives being marketed by companies that don't spy.
+This isn't something the free market can fix. We consumers have no choice in the matter. All the major companies that provide us with Internet services are interested in tracking us. Visit a website and it will almost certainly know who you are; there are lots of ways to be tracked without cookies. Cellphone companies routinely undo the web's privacy protection. One experiment at Carnegie Mellon took real-time videos of students on campus and was able to identify one-third of them by comparing their photos with publicly available tagged Facebook photos.
+Maintaining privacy on the Internet is nearly impossible. If you forget even once to enable your protections, or click on the wrong link, or type the wrong thing, and you've permanently attached your name to whatever anonymous service you're using. Monsegur slipped up once, and the FBI got him. If the director of the CIA can't maintain his privacy on the Internet, we've got no hope.
+In today's world, governments and corporations are working together to keep things that way. Governments are happy to use the data corporations collect -- occasionally demanding that they collect more and save it longer -- to spy on us. And corporations are happy to buy data from governments. Together the powerful spy on the powerless, and they're not going to give up their positions of power, despite what the people want.
+Fixing this requires strong government will, but they're just as punch-drunk on data as the corporations. Slap-on-the-wrist fines notwithstanding, no one is agitating for better privacy laws.
+So, we're done. Welcome to a world where Google knows exactly what sort of porn you all like, and more about your interests than your spouse does. Welcome to a world where your cell phone company knows exactly where you are all the time. Welcome to the end of private conversations, because increasingly your conversations are conducted by e-mail, text, or social networking sites.
+And welcome to a world where all of this, and everything else that you do or is done on a computer, is saved, correlated, studied, passed around from company to company without your knowledge or consent; and where the government accesses it at will without a warrant.
+Welcome to an Internet without privacy, and we've ended up here with hardly a fight.

data/test/documents/schneier-5 ADDED Viewed

@@ -0,0 +1,36 @@
+A core, not side, effect of technology is its ability to magnify power and multiply force—for both attackers and defenders. One side creates ceramic handguns, laser-guided missiles, and new-identity theft techniques, while the other side creates anti-missile defense systems, fingerprint databases, and automatic facial recognition systems.
+The problem is that it's not balanced: Attackers generally benefit from new security technologies before defenders do. They have a first-mover advantage. They're more nimble and adaptable than defensive institutions like police forces. They're not limited by bureaucracy, laws, or ethics. They can evolve faster. And entropy is on their side—it's easier to destroy something than it is to prevent, defend against, or recover from that destruction.
+For the most part, though, society still wins. The bad guys simply can't do enough damage to destroy the underlying social system. The question for us is: can society still maintain security as technology becomes more advanced?
+I don't think it can.
+Because the damage attackers can cause becomes greater as technology becomes more powerful. Guns become more harmful, explosions become bigger, malware becomes more pernicious… and so on. A single attacker, or small group of attackers, can cause more destruction than ever before.
+This is exactly why the whole post-9/11 weapons-of-mass-destruction debate was so overwrought: Terrorists are scary, terrorists flying airplanes into buildings are even scarier, and the thought of a terrorist with a nuclear bomb is absolutely terrifying.
+As the destructive power of individual actors and fringe groups increases, so do the calls for—and society's acceptance of—increased security.
+Rethinking Security
+Traditional security largely works "after the fact". We tend not to ban or restrict the objects that can do harm; instead, we punish the people who do harm with objects. There are exceptions, of course, but they're exactly that: exceptions. This system works as long as society can tolerate the destructive effects of those objects (for example, allowing people to own baseball bats and arresting them after they use them in a riot is only viable if society can tolerate the potential for riots).
+When that isn't enough, we resort to "before-the-fact" security measures. These come in two basic varieties: general surveillance of people in an effort to stop them before they do damage, and specific interdictions in an effort to stop people from using those technologies to do damage.
+But these measures work better at keeping dangerous technologies out of the hands of amateurs than at keeping them out of the hands of professionals.
+And in the global interconnected world we live in, they're not anywhere close to foolproof. Still, a climate of fear causes governments to try. Lots of technologies are already restricted: entire classes of drugs, entire classes of munitions, explosive materials, biological agents. There are age restrictions on vehicles and training restrictions on complex systems like aircraft. We're already almost entirely living in a surveillance state, though we don't realize it or won't admit it to ourselves. This will only get worse as technology advances… today's Ph.D. theses are tomorrow's high-school science-fair projects.
+Increasingly, broad prohibitions on technologies, constant ubiquitous surveillance, and Minority Report-like preemptive security will become the norm. We can debate the effectiveness of various security measures in different circumstances. But the problem isn't that these security measures won't work—even as they shred our freedoms and liberties—it's that no security is perfect.
+Because sooner or later, the technology will exist for a hobbyist to explode a nuclear weapon, print a lethal virus from a bio-printer, or turn our electronic infrastructure into a vehicle for large-scale murder. We'll have the technology eventually to annihilate ourselves in great numbers, and sometime after, that technology will become cheap enough to be easy.
+As it gets easier for one member of a group to destroy the entire group, and the group size gets larger, the odds of someone in the group doing it approaches certainty. Our global interconnectedness means that our group size encompasses everyone on the planet, and since government hasn't kept up, we have to worry about the weakest-controlled member of the weakest-controlled country. Is this a fundamental limitation of technological advancement, one that could end civilization? First our fears grip us so strongly that, thinking about the short term, we willingly embrace a police state in a desperate attempt to keep us safe; then, someone goes off and destroys us anyway?
+If security won't work in the end, what is the solution?
+Resilience—building systems able to survive unexpected and devastating attacks—is the best answer we have right now. We need to recognize that large-scale attacks will happen, that society can survive more than we give it credit for, and that we can design systems to survive these sorts of attacks. Calling terrorism an existential threat is ridiculous in a country where more people die each month in car crashes than died in the 9/11 terrorist attacks.
+If the U.S. can survive the destruction of an entire city—witness New Orleans after Hurricane Katrina or even New York after Sandy—we need to start acting like it, and planning for it. Still, it's hard to see how resilience buys us anything but additional time. Technology will continue to advance, and right now we don't know how to adapt any defenses—including resilience—fast enough.
+We need a more flexible and rationally reactive approach to these problems and new regimes of trust for our information-interconnected world. We're going to have to figure this out if we want to survive, and I'm not sure how many decades we have left.

data/test/test_author_classification.rb ADDED Viewed

@@ -0,0 +1,28 @@
+require 'pathname'
+require 'test/unit'
+dir = Pathname.new File.expand_path(File.dirname(__FILE__))
+require dir + '..' + 'lib' + 'bayes_naive_jdp'
+DOCUMENT_PATH = dir + 'documents'
+CONFIDENCE_MIN = 0.98
+class AuthorClassificationTest < Test::Unit::TestCase
+	def test_blog_author_classification
+		authors = ['lippert','schneier','krugman']
+		classifier = BayesNaiveJdp::Classifier.new
+		authors.each do |author|
+			4.times do |i|
+				file = DOCUMENT_PATH.join("#{author}-#{i+1}")
+				classifier.train(File.open(file).read, author)
+			end
+		end
+		authors.each do |author|
+			file = DOCUMENT_PATH.join("#{author}-5")
+			answer = classifier.classify(File.open(file).read)
+			assert_equal(answer[:winner][:classification],author)
+			assert(answer[:winner][:confidence] >= CONFIDENCE_MIN)
+		end
+	end
+end

metadata ADDED Viewed

@@ -0,0 +1,127 @@
+--- !ruby/object:Gem::Specification
+name: bayes_naive_jdp
+version: !ruby/object:Gem::Version
+  version: 0.0.1
+  prerelease:
+platform: ruby
+authors:
+- Jason Pollentier
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2013-08-18 00:00:00.000000000 Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: bundler
+  requirement: !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        version: '1.3'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        version: '1.3'
+- !ruby/object:Gem::Dependency
+  name: rake
+  requirement: !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+description: ! "A very simple naive Bayesian classifier. \n  \tI'm just using it as
+  practice as I learn how to package ruby code.\n\tThe algorithm used here is not
+  original, but an adaptation from Burak Kanber's \n\tMachine Learning in Javascript
+  series.\n\n\thttp://readable.cc/feed/view/34236/burak-kanber-s-blog\t\n"
+email:
+- pollentj@gmail.com
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- .gitignore
+- Gemfile
+- LICENSE.txt
+- README.md
+- Rakefile
+- bayes_naive_jdp.gemspec
+- lib/bayes_naive_jdp.rb
+- lib/bayes_naive_jdp/version.rb
+- test/documents/krugman-1
+- test/documents/krugman-2
+- test/documents/krugman-3
+- test/documents/krugman-4
+- test/documents/krugman-5
+- test/documents/lippert-1
+- test/documents/lippert-2
+- test/documents/lippert-3
+- test/documents/lippert-4
+- test/documents/lippert-5
+- test/documents/schneier-1
+- test/documents/schneier-2
+- test/documents/schneier-3
+- test/documents/schneier-4
+- test/documents/schneier-5
+- test/test_author_classification.rb
+homepage: ''
+licenses:
+- MIT
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ! '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+      segments:
+      - 0
+      hash: 3893581627066365178
+required_rubygems_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ! '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+      segments:
+      - 0
+      hash: 3893581627066365178
+requirements: []
+rubyforge_project:
+rubygems_version: 1.8.25
+signing_key:
+specification_version: 3
+summary: A very simple naive Bayesian classifier.
+test_files:
+- test/documents/krugman-1
+- test/documents/krugman-2
+- test/documents/krugman-3
+- test/documents/krugman-4
+- test/documents/krugman-5
+- test/documents/lippert-1
+- test/documents/lippert-2
+- test/documents/lippert-3
+- test/documents/lippert-4
+- test/documents/lippert-5
+- test/documents/schneier-1
+- test/documents/schneier-2
+- test/documents/schneier-3
+- test/documents/schneier-4
+- test/documents/schneier-5
+- test/test_author_classification.rb